In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import random

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:

# Hindi Unicode Hex Range is 2304:2432. Source: https://en.wikipedia.org/wiki/Devanagari_(Unicode_block)
SOS_token = 0
EOS_token = 1
hindi_alphabets = [chr(alpha) for alpha in range(2304, 2432)]
english_alphabets = [chr(alpha) for alpha in range(97, 123)]
hindi_alphabet_size = len(hindi_alphabets)
english_alphabet_size = len(english_alphabets)
hindi_alpha2index = {"SOS": 0,"EOS": 1}
english_alpha2index = {"SOS": 0,"EOS": 1}
for index, alpha in enumerate(hindi_alphabets):
    hindi_alpha2index[alpha] = index+2
for index, alpha in enumerate(english_alphabets):
    english_alpha2index[alpha] = index+2
hindi_index2alpha = {0 : "SOS", 1 : "EOS"}
english_index2alpha = { 0 : "SOS", 1 : "EOS"}
for index, alpha in enumerate(hindi_alphabets):
    hindi_index2alpha[index+2] = alpha
for index, alpha in enumerate(english_alphabets):
    english_index2alpha[index+2] = alpha 
print("Hindi A2I:\n", hindi_alpha2index)
print("-"*100)
print("English A2I:\n", english_alpha2index)
print("-"*100)
print("*"*100)
print("-"*100)
print("Hindi I2A:\n", hindi_index2alpha)
print("-"*100)
print("English I2A:\n", english_index2alpha)

Hindi A2I:
 {'SOS': 0, 'EOS': 1, 'ऀ': 2, 'ँ': 3, 'ं': 4, 'ः': 5, 'ऄ': 6, 'अ': 7, 'आ': 8, 'इ': 9, 'ई': 10, 'उ': 11, 'ऊ': 12, 'ऋ': 13, 'ऌ': 14, 'ऍ': 15, 'ऎ': 16, 'ए': 17, 'ऐ': 18, 'ऑ': 19, 'ऒ': 20, 'ओ': 21, 'औ': 22, 'क': 23, 'ख': 24, 'ग': 25, 'घ': 26, 'ङ': 27, 'च': 28, 'छ': 29, 'ज': 30, 'झ': 31, 'ञ': 32, 'ट': 33, 'ठ': 34, 'ड': 35, 'ढ': 36, 'ण': 37, 'त': 38, 'थ': 39, 'द': 40, 'ध': 41, 'न': 42, 'ऩ': 43, 'प': 44, 'फ': 45, 'ब': 46, 'भ': 47, 'म': 48, 'य': 49, 'र': 50, 'ऱ': 51, 'ल': 52, 'ळ': 53, 'ऴ': 54, 'व': 55, 'श': 56, 'ष': 57, 'स': 58, 'ह': 59, 'ऺ': 60, 'ऻ': 61, '़': 62, 'ऽ': 63, 'ा': 64, 'ि': 65, 'ी': 66, 'ु': 67, 'ू': 68, 'ृ': 69, 'ॄ': 70, 'ॅ': 71, 'ॆ': 72, 'े': 73, 'ै': 74, 'ॉ': 75, 'ॊ': 76, 'ो': 77, 'ौ': 78, '्': 79, 'ॎ': 80, 'ॏ': 81, 'ॐ': 82, '॑': 83, '॒': 84, '॓': 85, '॔': 86, 'ॕ': 87, 'ॖ': 88, 'ॗ': 89, 'क़': 90, 'ख़': 91, 'ग़': 92, 'ज़': 93, 'ड़': 94, 'ढ़': 95, 'फ़': 96, 'य़': 97, 'ॠ': 98, 'ॡ': 99, 'ॢ': 100, 'ॣ': 101, '।': 102, '॥': 103, '०': 104, '१': 105, '२': 106, '३': 107, '४': 108, '५'

In [None]:
data_train = pd.read_csv("hin_train.csv",header= None)
data_train = pd.DataFrame(np.array(data_train),columns=["English","Hindi"])
data_val = pd.read_csv("hin_valid.csv",header= None)
data_val = pd.DataFrame(np.array(data_val),columns=["English","Hindi"])
data_test = pd.read_csv("hin_test.csv",header= None)
data_test = pd.DataFrame(np.array(data_test),columns=["English","Hindi"])
print(data_train.shape,data_val.shape,data_test.shape)
data_train.head()

(51200, 2) (4096, 2) (4096, 2)


Unnamed: 0,English,Hindi
0,shastragaar,शस्त्रागार
1,bindhya,बिन्द्या
2,kirankant,किरणकांत
3,yagyopaveet,यज्ञोपवीत
4,ratania,रटानिया


In [None]:
data_train_X = np.array(data_train["English"])
data_train_y = np.array(data_train["Hindi"])
data_train_X,data_train_y

(array(['shastragaar', 'bindhya', 'kirankant', ..., 'asahmaton',
        'sulgaayin', 'anchuthengu'], dtype=object),
 array(['शस्त्रागार', 'बिन्द्या', 'किरणकांत', ..., 'असहमतों', 'सुलगायीं',
        'अंचुतेंगु'], dtype=object))

In [None]:
class Tokenize():
    def __init__(self,Lang_From,Lang_To):
        # Hindi Unicode Hex Range is 2304:2432. Source: https://en.wikipedia.org/wiki/Devanagari_(Unicode_block)
        self.L1 = Lang_From
        self.L2 = Lang_To
        self.SOS_token = 0
        self.EOS_token = 1
        hindi_alphabets = [chr(alpha) for alpha in range(2304, 2432)]
        english_alphabets = [chr(alpha) for alpha in range(97, 123)]
        hindi_alphabet_size = len(hindi_alphabets)
        english_alphabet_size = len(english_alphabets)
        hindi_alpha2index = {"SOS": 0,"EOS": 1}
        english_alpha2index = {"SOS": 0,"EOS": 1}
        for index, alpha in enumerate(hindi_alphabets):
            hindi_alpha2index[alpha] = index+2
        for index, alpha in enumerate(english_alphabets):
            english_alpha2index[alpha] = index+2
        hindi_index2alpha = {0 : "SOS", 1 : "EOS"}
        english_index2alpha = { 0 : "SOS", 1 : "EOS"}
        for index, alpha in enumerate(hindi_alphabets):
            hindi_index2alpha[index+2] = alpha
        for index, alpha in enumerate(english_alphabets):
            english_index2alpha[index+2] = alpha 

        self.Lang_From_Alpha_2_Index = english_alpha2index
        self.Lang_To_Alpha_2_Index = hindi_alpha2index
        self.Lang_From_Index_2_Alpha = english_index2alpha
        self.Lang_To_Index_2_Alpha = hindi_index2alpha

    def tensorFromWord(self,Lang, word):
        if Lang == "L1":
            indexes = [self.Lang_From_Alpha_2_Index[letter] for letter in word]
        elif Lang == "L2":
            indexes = [self.SOS_token]+[self.Lang_To_Alpha_2_Index[letter] for letter in word]
        #print([self.EOS_token]*(30-len(indexes)))
        indexes+=[self.EOS_token]*(30-len(indexes))
        return torch.tensor(indexes, dtype=torch.long, device=device)#.view(-1, 1)

    def tensorsFromPair(self,pair):
        input_tensor = self.tensorFromWord("L1",pair[self.L1])
        target_tensor = self.tensorFromWord("L2",pair[self.L2])
        return (input_tensor, target_tensor)
    def tensorsFromData(self,Data):
        Tensors_Val = []
        for i in tqdm(range(Data.shape[0])):
            Tensors_Val.append(self.tensorsFromPair(Data.iloc[i]))
        return Tensors_Val
    def WordFromtensors(self,Lang, word):
        if Lang == "L1":
            letters = [self.Lang_From_Index_2_Alpha[letter.item()] for letter in word if ((letter.item() != EOS_token) and (letter.item() != SOS_token))]
        elif Lang == "L2":
            letters = [self.Lang_To_Index_2_Alpha[letter.item()] for letter in word if ((letter.item() != EOS_token) and (letter.item() != SOS_token))]
        #print([self.EOS_token]*(30-len(indexes)))
        word = ''.join(letters)
        return word
    def PairFromtensors(self,pair):
        input_word = self.WordFromtensors("L1",pair[0])
        target_word = self.WordFromtensors("L2",pair[1])
        return (input_word, target_word)
    '''def DataFromtensors(self,Data):
        Tensors_Val = []
        for i in tqdm(range(Data.shape[0])):
            Tensors_Val.append(self.PairFromtensors(data_train.iloc[i]))
        return Tensors_Val'''
                                             
                                        

In [None]:
print(data_train.iloc[0])

English    shastragaar
Hindi       शस्त्रागार
Name: 0, dtype: object


In [None]:
T = Tokenize("English","Hindi")
data_train_num = T.tensorsFromData(data_train)
data_val_num = T.tensorsFromData(data_val)
data_test_num = T.tensorsFromData(data_test)
#tensorFromWord(english_alpha2index,data_train_X[0])

100%|██████████| 51200/51200 [00:09<00:00, 5305.20it/s]
100%|██████████| 4096/4096 [00:00<00:00, 6173.93it/s]
100%|██████████| 4096/4096 [00:00<00:00, 7931.27it/s]


In [None]:
data_train_num[0]

(tensor([20,  9,  2, 20, 21, 19,  2,  8,  2,  2, 19,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1], device='cuda:0'),
 tensor([ 0, 56, 58, 79, 38, 79, 50, 64, 25, 64, 50,  1,  1,  1,  1,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1], device='cuda:0'))

In [None]:
T.PairFromtensors(data_train_num[0])

('shastragaar', 'शस्त्रागार')

In [None]:
# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [None]:
train_set=CustomDataset(data_train_num)
valid_set=CustomDataset(data_val_num)
test_set=CustomDataset(data_test_num)

In [None]:
train_data_set=DataLoader(train_set, batch_size=64, shuffle=True)
valid_data_set=DataLoader(valid_set, batch_size=64, shuffle=False)
test_data_set=DataLoader(test_set, batch_size=64, shuffle=False)
print(len(train_data_set))
print(len(valid_data_set))
print(len(test_data_set))

800
64
64


# Encoder-Decoder

In [None]:
class Encoder(nn.Module):
    def __init__(self,input_size,embedding_size,hidden_size,num_layers, dropouts,cell_type,bidirectional):
        super(Encoder,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = nn.Dropout(dropouts)
        self.embedding = nn.Embedding(input_size,embedding_size)
        self.cell_type = cell_type
        self.bidirectional = bidirectional
        if num_layers >1:
            if self.cell_type == "LSTM":
                self.rnn = nn.LSTM(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
            elif self.cell_type == "RNN":
                self.rnn = nn.RNN(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
            elif self.cell_type == "GRU":
                self.rnn = nn.GRU(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
        else:
            if self.cell_type == "LSTM":
                self.rnn = nn.LSTM(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
            elif self.cell_type == "RNN":
                self.rnn = nn.RNN(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
            elif self.cell_type == "GRU":
                self.rnn = nn.GRU(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
                
    def forward(self,x):
        # X : (seq_length,N)
        embedding = self.dropout(self.embedding(x))
        # embedding : seq_length,N,embedding_size)
        if self.cell_type == "LSTM":
            outputs,(hidden,cell) = self.rnn(embedding)
        else:
            outputs,hidden = self.rnn(embedding)
            cell = None
        return hidden,cell
            

In [None]:
class Decoder(nn.Module):
    def __init__(self,input_size,embedding_size,hidden_size,output_size,num_layers,dropouts,cell_type,bidirectional):
        super(Decoder,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = nn.Dropout(dropouts)
        self.embedding = nn.Embedding(input_size,embedding_size)
        self.cell_type = cell_type
        self.bidirectional = bidirectional
        if num_layers>1:            
            if self.cell_type == "LSTM":
                self.rnn = nn.LSTM(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
            elif self.cell_type == "RNN":
                self.rnn = nn.RNN(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
            elif self.cell_type == "GRU":
                self.rnn = nn.GRU(embedding_size,hidden_size,num_layers,dropout=dropouts,bidirectional=self.bidirectional)
        else:
            if self.cell_type == "LSTM":
                self.rnn = nn.LSTM(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
            elif self.cell_type == "RNN":
                self.rnn = nn.RNN(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
            elif self.cell_type == "GRU":
                self.rnn = nn.GRU(embedding_size,hidden_size,num_layers,bidirectional=self.bidirectional)
        self.fc = nn.Linear((1+self.bidirectional*1)*hidden_size,output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    def forward(self,x,hidden,cell):
        # x :(N) but we want (1,N)
        x = x.unsqueeze(0)
        embedding = self.dropout(self.embedding(x))
        # embedding : (1,N,embedding_size)
        if self.cell_type == "LSTM":
            outputs,(hidden,cell) = self.rnn(embedding,(hidden,cell))
        else:
            outputs,hidden = self.rnn(embedding,hidden)
            cell = None
        # outputs : (1,N,hidden_size)
        predictions = self.fc(outputs)
        #predictions : (1,N,output_vocab_size)
        predictions = self.softmax(predictions[0])
        #predictions = predictions.squeeze(0)
        
        return predictions,hidden,cell


In [None]:
class Seq2Seq(nn.Module):
    def __init__(self,encoder,decoder):
        super(Seq2Seq,self).__init__()
        self.encoder = encoder
        self.decoder = decoder
    
    def forward(self,source,target,teacher_forcing=0.5):
        batch_size = source.shape[1]
        self.target_len = target.shape[0]
        target_vocab_size = len(hindi_alpha2index)
        
        outputs = torch.zeros(self.target_len,batch_size,target_vocab_size).to(device)
        hidden,cell = self.encoder(source)
        
        # Start Token
        x = target[0]
        for t in range(1,self.target_len):
            output,hidden,cell = self.decoder(x,hidden,cell)
            outputs[t] = output
            best_guess = output.argmax(1)
            x = target[t] if random.random() < teacher_forcing else best_guess
        return outputs
    def predict(self,source):
        batch_size = source.shape[1]
        target_vocab_size = len(hindi_alpha2index)
        
        outputs = torch.zeros(self.target_len,batch_size,target_vocab_size).to(device)
        hidden,cell = self.encoder(source)
        
        # Start Token
        x = 0*source[0]
        for t in range(1,self.target_len):
            output,hidden,cell = self.decoder(x,hidden,cell)
            outputs[t] = output
            best_guess = output.argmax(1)
            x = best_guess
        return outputs
        

In [None]:
!pip install wandb -qU

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mme19b031[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
wandb.init(
      # Set the project where this run will be logged
      project="ME19B031_CS6910_ASSIGNMENT_3_Final", 
      # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
      #name=f"experiment", 
      # Track hyperparameters and run metadata
      )

In [None]:
def train_and_tune(config=None):
  # Initialize a new wandb run
  with wandb.init(config=config):
    # If called by wandb.agent, as below,
    # this config will be set by Sweep Controller
    config = wandb.config
    wandb.run.name='es_'+str(wandb.config.input_embedding_size)+'-nel_'+str(wandb.config.number_of_encoder_layers)+'-ndl_'+str(wandb.config.number_of_decoder_layers)+'-hls_'+str(wandb.config.hidden_layer_size)+'-ct_'+config.cell_type+'-bd_'+str(config.bidirectional)+'-do_'+str(config.dropout)+'-lr_'+str(config.learning_rate)+'-bs_'+str(config.batch_size)



    # Training Params
    num_epochs = 10
    learning_rate = config.learning_rate
    batch_size = config.batch_size

    # Model Params
    input_size_encoder = len(english_alpha2index)
    input_size_decoder = len(hindi_alpha2index)
    output_size = len(hindi_alpha2index)
    encoder_embeddings_size = config.input_embedding_size
    decoder_embeddings_size = config.input_embedding_size
    hidden_size = config.hidden_layer_size
    num_enc_layers = config.number_of_encoder_layers
    num_dec_layers = config.number_of_decoder_layers
    enc_dropout = config.dropout
    dec_dropout = config.dropout
    cell_type = config.cell_type
    bidirectional = config.bidirectional


    encoder_net = Encoder(input_size_encoder,encoder_embeddings_size,hidden_size,num_enc_layers,enc_dropout,cell_type,bidirectional).to(device)
    decoder_net = Decoder(input_size_decoder,decoder_embeddings_size,hidden_size,output_size,num_enc_layers,dec_dropout,cell_type,bidirectional).to(device)

    model = Seq2Seq(encoder_net,decoder_net).to(device)
    optimizer = optim.Adam(model.parameters(),lr = learning_rate)
    pad_idx = EOS_token
    criterion = nn.CrossEntropyLoss()#ignore_index=pad_idx)
    Loss_log = []
    for epoch in range(num_epochs):
        epoch_loss = 0
        for batch in train_data_set:
            inp_data = batch[0].T.to(device)
            target = batch[1].T.to(device)
            #print(inp_data.shape)
            #print(inp_data)
            output = model(inp_data,target)
            #output : (trg_len,batch_size,output_dim)
            output = output[1:].reshape(-1,output.shape[2])
            target = target[1:].reshape(-1)
            
            optimizer.zero_grad()
            loss = criterion(output,target)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),max_norm = 1)
            optimizer.step()
            epoch_loss += loss.item()
        Loss_log.append(epoch_loss)
        Train_epoch_loss = epoch_loss/len(train_data_set)

        Predictions_List = []
        Total = 0
        crct = 0
        Val_epoch_loss = 0
        for batch in valid_data_set:
            inp_data = batch[0].T.to(device)
            target = batch[1].T.to(device)
            output = model.predict(inp_data)
            #print(output_val[2])
            best_guess = output.argmax(2)
            predictions = best_guess.squeeze()
            #print(predictions.shape)
            output = output[1:].reshape(-1,output.shape[2])
            target = target[1:].reshape(-1)
            loss = criterion(output,target)
            Val_epoch_loss += loss.item()
            for i in range(batch[1].shape[0]):
                Pairs_P = T.PairFromtensors((batch[0][i],predictions.T[i]))
                Pairs_T = T.PairFromtensors((batch[0][i],batch[1][i]))
                Total+=1
                if Pairs_P[1] == Pairs_T[1]:
                    crct +=1
        Val_epoch_loss=Val_epoch_loss/len(valid_data_set)
        Val_Accuracy = crct/Total
        wandb.log({"train_loss":Train_epoch_loss,"val_loss":Val_epoch_loss,"val_accuracy": Val_Accuracy})



In [None]:
sweep_config={'method':'bayes',
              'metric' : {
                  'name':'val_accuracy',
                  'goal':'maximize'},
              'parameters':{ 
                  'learning_rate':{'values':[0.001,0.0001]},
                  'batch_size':{'values':[32,64]},
                  'input_embedding_size':{'values':[256,512]} ,
                  'number_of_encoder_layers':{'values':[1,2,3]},
                  'number_of_decoder_layers':{'values':[1,2,3]},
                  'hidden_layer_size':{'values':[256,512]},
                  'cell_type':{'values':["RNN","GRU","LSTM"]},
                  'bidirectional':{'values':[True,False]},
                  'dropout':{'values':[0.2,0.3]} }}
import pprint
pprint.pprint(sweep_config)
sweep_id=wandb.sweep(sweep_config,project="ME19B031_CS6910_ASSIGNMENT_3_Final")

{'method': 'bayes',
 'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
 'parameters': {'batch_size': {'values': [32, 64]},
                'bidirectional': {'values': [True, False]},
                'cell_type': {'values': ['RNN', 'GRU', 'LSTM']},
                'dropout': {'values': [0.2, 0.3]},
                'hidden_layer_size': {'values': [256, 512]},
                'input_embedding_size': {'values': [256, 512]},
                'learning_rate': {'values': [0.001, 0.0001]},
                'number_of_decoder_layers': {'values': [1, 2, 3]},
                'number_of_encoder_layers': {'values': [1, 2, 3]}}}
Create sweep with ID: gy54guqp
Sweep URL: https://wandb.ai/me19b031/ME19B031_CS6910_ASSIGNMENT_3_Final/sweeps/gy54guqp


In [None]:
wandb.agent(sweep_id, train_and_tune,count=30)

[34m[1mwandb[0m: Agent Starting Run: cav2kyvb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


0,1
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▂▂▃▄▅▅▆██
val_loss,█▆▅▄▃▂▂▂▁▁

0,1
train_loss,0.45775
val_accuracy,0.08667
val_loss,0.42103


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ziek4kst with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


0,1
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▁▂▃▅▅▆▇▇█
val_loss,█▆▄▃▂▂▂▁▁▁

0,1
train_loss,0.27095
val_accuracy,0.25317
val_loss,0.31162


[34m[1mwandb[0m: Agent Starting Run: 2ax5cnhm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 1


0,1
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▄▅▆▇▇██▇█
val_loss,█▃▂▁▂▂▂▃▁▃

0,1
train_loss,0.12167
val_accuracy,0.37134
val_loss,0.28701


[34m[1mwandb[0m: Agent Starting Run: aelrcyek with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 1
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▃▂▂▁▁▁▁▁▁
val_accuracy,▁▅▆▆▆▇▆▆█▇
val_loss,█▅▃▃▂▁▁▂▂▂

0,1
train_loss,0.4332
val_accuracy,0.10229
val_loss,0.43212


[34m[1mwandb[0m: Agent Starting Run: 0xtlwh4i with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▆▆▇▇██▇█
val_loss,▇▃▁▁▁▃▃▄▆█

0,1
train_loss,0.07437
val_accuracy,0.39551
val_loss,0.31093


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mvm5zew8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▄▂▂▂▂▁▁▁▁
val_accuracy,▁▄▆▆▇▇████
val_loss,█▃▂▂▁▁▁▁▁▁

0,1
train_loss,0.13984
val_accuracy,0.35229
val_loss,0.27848


[34m[1mwandb[0m: Agent Starting Run: vdjzf2m3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


0,1
train_loss,█▆▄▃▂▂▂▁▁▁
val_accuracy,▁▁▂▄▅▆▇▇▇█
val_loss,█▆▃▂▂▂▁▁▁▁

0,1
train_loss,0.25033
val_accuracy,0.26147
val_loss,0.29835


[34m[1mwandb[0m: Agent Starting Run: v8gnqfdw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


0,1
train_loss,█▃▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
train_loss,0.15922
val_accuracy,0.34326
val_loss,0.27737


[34m[1mwandb[0m: Agent Starting Run: 10jql9fw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▄▂▂▂▂▁▁▁▁
val_accuracy,▁▄▆▇▇▇████
val_loss,█▃▂▁▁▁▁▁▁▁

0,1
train_loss,0.13144
val_accuracy,0.36523
val_loss,0.28791


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0g2jugw1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 1


0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▃▄▆▆▇▇███
val_loss,█▅▃▃▂▂▁▁▁▁

0,1
train_loss,0.25018
val_accuracy,0.27686
val_loss,0.3163


[34m[1mwandb[0m: Agent Starting Run: 09t10jaq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇▇▇███
val_loss,█▅▂▁▄▃▅▅▆█

0,1
train_loss,0.08943
val_accuracy,0.38281
val_loss,0.30961


[34m[1mwandb[0m: Agent Starting Run: 3fhoepea with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇▇▇▇▇█
val_loss,▇▄▁▁▂▂▃▅▇█

0,1
train_loss,0.0749
val_accuracy,0.39575
val_loss,0.30515


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7xqnbleg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.920307…

0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▅▇█████▇
val_loss,█▄▃▁▂▃▂▅██

0,1
train_loss,0.07637
val_accuracy,0.38037
val_loss,0.3023


[34m[1mwandb[0m: Agent Starting Run: qyvclycw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.920291…

0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇█▇███
val_loss,█▃▂▁▃▂▃▅▄▅

0,1
train_loss,0.08843
val_accuracy,0.38599
val_loss,0.28881


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qz8qsnky with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 1


0,1
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▃▅▆▇▇████
val_loss,█▄▂▂▂▁▁▁▁▁

0,1
train_loss,0.20013
val_accuracy,0.30396
val_loss,0.30064


[34m[1mwandb[0m: Agent Starting Run: eiarn9vh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 3


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.121245…

0,1
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▅▆▇▇▇▇███
val_loss,▆▃▃▁▁▂▄▃▆█

0,1
train_loss,0.07719
val_accuracy,0.39917
val_loss,0.3071


[34m[1mwandb[0m: Agent Starting Run: rup4ipnf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 2
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


0,1
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▅▆▆▇▇█▇▇█
val_loss,█▃▃▂▁▃▄▅▆▆

0,1
train_loss,0.08075
val_accuracy,0.3916
val_loss,0.29755


[34m[1mwandb[0m: Agent Starting Run: g79hwftn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 3
[34m[1mwandb[0m: 	number_of_encoder_layers: 2


VBox(children=(Label(value='0.001 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.121258…

0,1
train_loss,█▄▃▃▂▂▂▁▁▁
val_accuracy,▁▄▆▇▇█▇▇█▇
val_loss,█▃▂▁▂▁▄▄▆▆

0,1
train_loss,0.08233
val_accuracy,0.37061
val_loss,0.29297


[34m[1mwandb[0m: Agent Starting Run: btea6ond with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 512
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_decoder_layers: 1
[34m[1mwandb[0m: 	number_of_encoder_layers: 1


Error in callback <function _WandbInit._pause_backend at 0x7f0916b9cdc0> (for post_run_cell):


BrokenPipeError: ignored