## CS6910 Assignment 3
#### This code file contains all the classes and functions 
#### needed to train the Sequence to sequence model without attention mechansism 
#### I have mentioned the Reference sources that i have used to write the code in the README.me file

## Importing the required Libraries

In [1]:
import os
import pandas as pd
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import csv
from prettytable import PrettyTable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [45]:
#pip install prettytable

## Argparse

In [19]:
# Using argparse, I have define the arguments and options that my program accepts,
# and argparse will run the code, pass arguments from command line and 
# automatically generate help messages. I have given the defaults values for 
# all the arguments, so code can be run without passing any arguments.
# lastly returning the arguments to be used in the running of the code.

import argparse

parser = argparse.ArgumentParser(description="Stores all the hyperpamaters for the model.")
parser.add_argument("-wp", "--wandb_project",default="cs6910_assignment 3 new" ,type=str,
                    help="Enter the Name of your Wandb Project")
parser.add_argument("-we", "--wandb_entity", default="am22s020",type=str,
                    help="Wandb Entity used to track experiments in the Weights & Biases dashboard.")
parser.add_argument("-ws", "--wandb_sweep", default="False", type=bool,
                    help="If you want to run wandb sweep then give True")
parser.add_argument("-e", "--epochs",default="1", type=int, choices=[1, 5, 10],
                    help="Number of epochs to train neural network.")
parser.add_argument("-hs", "--hidden_size",default="256", type=int, help="no. of neurons in the hidden layer of the N/W")
parser.add_argument("-c", "--cell_type",default="lstm", type=str, choices=["lstm", "gru", "rnn"])
parser.add_argument("-nl", "--num_layers",default="2", type=int, 
                    choices=[2, 3, 4], help="number of recurrent layers")
parser.add_argument("-ems", "--embedding_size", default="256", type=int, choices=[64, 128, 256])
parser.add_argument("-bd", "--bi_directional", default="True", type=bool)

args = parser.parse_args()

wandb_project = args.wandb_project
wandb_entity = args.wandb_entity
wandb_sweep = args.wandb_sweep
num_epochs = args.epochs
hidden_size = args.hidden_size
cell_type = args.cell_type
num_layers = args.num_layers
embedding_size = args.embedding_size
bi_directional = args.bi_directional

print("wandb_project :", wandb_project , "wandb_entity: ", wandb_entity,"wandb_sweep: ",wandb_sweep,
      "epochs: ",num_epochs,"hidden_size: ",hidden_size, "cell_type: ", cell_type,
      "num_layers: ",num_layers,"embedding_size: ",embedding_size, 
      "bi_directional: ", bi_directional)

## Preparing the datasets

In [2]:
class Vocabulary():
    """
    This class(Vocabulary), builds a character-level vocabulary for a given list of words.
    It initializes the vocabulary with four special tokens (PAD, SOW, EOW, and UNK) and creates
    two dictionaries (stoi and itos) to map characters to integers and vice versa.
    Tokenizer: Tokenizes a given text into individual characters.
    build_vocabulary(): Takes a list of words and adds each unique character 
    to the vocabulary, along with a unique integer ID.
    numericalize(): Converts a given text into a list of integers, where each 
    integer corresponds to the ID of a character in the vocabulary.
    
    """
    def __init__(self):
        self.itos = {0:"<PAD>",1:"<SOW>",2:"<EOW>",3:"<UNK>"}
        self.stoi = {"<PAD>":0,"<SOW>":1,"<EOW>":2,"<UNK>":3}
        #self.freq_threshold = freq_threshold
    
    def __len__(self):
        return len(self.itos)
    
    @staticmethod
    def tokenizer(text):
        return [*text]
    
    def build_vocabulary(self, word_list):
        char_list = []
        idx = 4
        
        for word in word_list:
            for char in self.tokenizer(word):
                if char not in char_list:
                    char_list.append(char)
                    self.stoi[char] = idx
                    self.itos[idx] = char
                    idx+=1
                    
                    
    def numericalize(self, text):
        tokenized_text = self.tokenizer(text)
        
        return [self.stoi[token] if token in self.stoi else self.stoi["<UNK>"] for token in tokenized_text]
                 

In [3]:
class aksharantar(Dataset):
    """
    This class used to process text data for a machine translation task.
    root_dir: the root directory where the data is stored
    out_lang: the target language for translation 
    dataset_type: either "train", "test", or "val" indicating which dataset is being used.
    After loadinf data __init__() builds the vocabulary for each language by adding all unique characters in 
    the language's text data to the corresponding Vocabulary object.
    The __getitem__() method takes an index and returns the numericalized form of the corresponding input 
    and output sentences.
    It tokenizes each sentence into characters and adds special start-of-word (<SOW>) and end-of-word (<EOW>) 
    tokens to the beginning and end of the numericalized output sentence.
    Finally, it returns PyTorch tensors of the numericalized input and output sentences.
    
    """
        
    def __init__(self, root_dir, out_lang, dataset_type): 
        
        # Read file
        self.file_name = out_lang + "_" + dataset_type + ".csv"
        self.file_dir = os.path.join(root_dir, out_lang, self.file_name)
        self.df = pd.read_csv(self.file_dir, names = ["latin", "hindi"])
        
        # Get columns of input and output language
        self.latin = self.df["latin"]
        self.hindi = self.df["hindi"]
        
        # Initialize vocabulary and build vocab
        self.vocab_eng = Vocabulary()
        self.vocab_eng.build_vocabulary(self.latin.tolist())
        
        # Initialize vocabulary and build vocab
        self.vocab_hin = Vocabulary()
        self.vocab_hin.build_vocabulary(self.hindi.tolist())
        
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        latin = self.latin[index]
        hindi = self.hindi[index]
        
        numericalized_hindi = [self.vocab_hin.stoi["<SOW>"]]
        numericalized_hindi += self.vocab_hin.numericalize(hindi)
        numericalized_hindi.append(self.vocab_hin.stoi["<EOW>"])
        
        numericalized_latin = [self.vocab_eng.stoi["<SOW>"]]
        numericalized_latin += self.vocab_eng.numericalize(latin)
        numericalized_latin.append(self.vocab_eng.stoi["<EOW>"])
        
        return torch.tensor(numericalized_latin), torch.tensor(numericalized_hindi) 
               
        

In [4]:
class MyCollate:
    """
    This class is used to collate the data items into batches for DataLoader. 
    It takes two arguments, pad_idx_eng and pad_idx_hin, which are the index of the <PAD> token
    in the English and Hindi vocabularies respectively.
    
    
    """
    def __init__(self, pad_idx_eng, pad_idx_hin):
        self.pad_idx_eng = pad_idx_eng
        self.pad_idx_hin = pad_idx_hin
        
    def __call__(self, batch):
        inputs = [item[0] for item in batch]
        inputs = pad_sequence(inputs, batch_first=False, padding_value=self.pad_idx_eng)
        
        targets = [item[1] for item in batch]
        targets = pad_sequence(targets, batch_first=False, padding_value=self.pad_idx_hin)
        
        return inputs, targets
        
        

In [5]:
def get_loader(root_dir, out_lang, dataset_type, batch_size, pin_memory=True ):
    """
    This class returns a PyTorch DataLoader object and a custom dataset object. 
    The DataLoader object loads the data in batches.
    
    """
    
    dataset = aksharantar(root_dir, out_lang, dataset_type)
    
    pad_idx_eng = dataset.vocab_eng.stoi["<PAD>"]
    pad_idx_hin = dataset.vocab_hin.stoi["<PAD>"]
    
    loader = DataLoader(dataset=dataset,batch_size=batch_size,
                       pin_memory=pin_memory,
                       collate_fn=MyCollate(pad_idx_eng=pad_idx_eng, pad_idx_hin=pad_idx_hin),
                       shuffle=True)
    return loader, dataset


## Getting the model Ready

In [6]:
class Encoder(nn.Module):
    """
    This code defines an Encoder class for a sequence-to-sequence model.
    
    embedding_size:The embedding size is the dimensionality of the dense vector representation
    hidden_size: the number of units or neurons in the hidden layer of the network.
    num_layers: The number of layers in an RNN refers to the number of recurrent layers that are
    stacked on top of each other to process sequential input.
    bi_directional : input sequence to be processed in both forward and backward directions.
    
    The class takes in an input size, embedding size, hidden size, 
    number of layers, dropout rate, cell type (GRU, LSTM, or RNN), 
    and whether the network is bidirectional. The forward method takes in 
    an input tensor x, applies dropout to its embedded representation, and 
    passes it through a GRU, LSTM, or RNN layer depending on the cell type specified. 
    The final hidden states of the layer(s) are returned.
    
    """
    #input_size represents the dimensionality of the 
    #encoder's input space, indicating the number of possible input tokens or
    #categories that the coder can generate.
    
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, cell_type, bi_directional):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type = cell_type
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers, dropout=p, bidirectional=bi_directional)
        self.lstm = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p,bidirectional=bi_directional)
        self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, dropout=p,bidirectional=bi_directional)

    def forward(self, x):
        # x, shape=(seq_length, N)
        embedding = self.dropout(self.embedding(x))
        # embedding shape = (seq_length, N,embedding_size )
        
        if self.cell_type == 'gru':
            _, hidden = self.gru(embedding)
            return hidden
        
        if self.cell_type == 'lstm':
            _, (hidden, cell) = self.lstm(embedding)
            return hidden, cell
        
        if self.cell_type == 'rnn':
            _, hidden = self.rnn(embedding)
            return hidden
          
    # This method is called at the beginning of each new input sequence
    # to reset the hidden state.
#     def initHidden(self):
#         return torch.zeros(1, 1, self.hidden_size, device=device)

In [7]:
class Decoder(nn.Module):
    """
    This code defines the Decoder class, which is responsible for decoding the encoded input sequence
    and generating the target sequence. 
    The method first unsqueezes x to add a batch dimension and then applies dropout to the embedding layer. 
    It then passes the embedded input sequence through the decoder's RNN layer, 
    which can be either GRU, LSTM, or RNN.
    Then passes the output through a linear layer to get the predictions, which are returned 
    along with the hidden and cell states.
    Finally, the method squeezes the predictions tensor to remove the batch dimension before returning 
    the predictions and hidden/cell states.
    
    """
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers,
                 p, cell_type, bi_directional ):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_type = cell_type
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.gru = nn.GRU(embedding_size, hidden_size, num_layers, dropout=p,bidirectional=bi_directional )
        self.lstm = nn.LSTM(embedding_size, hidden_size,num_layers, dropout=p, bidirectional=bi_directional)
        self.rnn = nn.RNN(embedding_size, hidden_size,num_layers, dropout=p, bidirectional=bi_directional)
        if bi_directional:
            self.fc = nn.Linear(2*hidden_size, output_size)
        else:
            self.fc = nn.Linear(hidden_size, output_size)        
        
    def forward(self, x, hidden, cell):
        # x, shape=(N) but we want (1, N)
        x = x.unsqueeze(0)
        
        embedding = self.dropout(self.embedding(x))
        # embedding shape = (1, N,embedding_size )
        
        if self.cell_type == 'gru':
            outputs, hidden = self.gru(embedding, hidden)
            #shape of output (1,N,hidden_size)
            
        if self.cell_type == 'lstm':
            outputs, (hidden, cell) = self.lstm(embedding, (hidden, cell))
            
        if self.cell_type == 'rnn':
            outputs, hidden = self.rnn(embedding, hidden)
            
        predictions = self.fc(outputs)
        # shape of predictions = (1, N, length_of_vocabs)
        
        predictions = predictions.squeeze(0)
        
        if self.cell_type == 'lstm':
            return predictions, hidden, cell
        else:
            return predictions, hidden
    

In [8]:
class Seq2Seq(nn.Module):
    """
    This class have functions which takes words as input and target words to find the 
    predictions using the model build in the forward function.
    This function uses the encoder and decoder formed earlier.
    
    """
    def __init__(self, encoder, decoder, cell_type):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.cell_type = cell_type
        
    def forward(self, word_input, word_target, teacher_force_ratio=0.5):
        
        batch_size = word_input.shape[1]
        target_length = word_target.shape[0]
        
        outputs = torch.zeros(target_length, batch_size, len(train_data.vocab_hin)).to(device)
        
        if self.cell_type == 'lstm':
             hidden, cell = self.encoder(word_input)
        else:
            hidden = self.encoder(word_input)
            
        # grab start token
        x= word_target[0]
        
        for t in range(1, target_length):
            if self.cell_type == "lstm":
                output, hidden, cell = self.decoder(x, hidden, cell)
            else:
                output, hidden = self.decoder(x, hidden, 0)
                
            outputs[t] = output
            
            best_pred = output.argmax(1)
            
            x = word_target[t] if random.random() < teacher_force_ratio else best_pred
            
        return outputs
    

## Functions to find accuracy and print and save outputs

In [9]:
def predict(model, input_list, cell_type, max_length=30):
    
    '''
    max_length: The maximum length of latin input.
    The purpose of this function is to accept a list of characters in the input 
    language and then provide a list of characters in the output language.
    cell_type: to use which among lstm, rnn or gru cell
    
    
    '''
    
    # Making the indexes of the input according to the training data vocabulary
    # Because the index2str dicts of train data and val/test datasets are diffent
    
    input_word = [train_data.vocab_eng.stoi[char] for char in input_list]
    input_word = torch.LongTensor(input_word)

    # Input word is of shape (seq_length) but we want it to be (seq_length, 1) where 1 represents batch size
    input_word = input_word.view(input_word.shape[0],1).to(device)
    
    start_token_index = 1
    end_token_index = 2
   
    # Encoder
    with torch.no_grad():
        if model.cell_type == "lstm":
            hidden, cell = model.encoder(input_word)
        else:
            hidden = model.encoder(input_word)
    
    # Add start token to outputs
    outputs = [start_token_index]

    for _ in range(max_length):
        prev_char = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            if model.cell_type == "lstm":
                output, hidden, cell = model.decoder(prev_char, hidden, cell)
            else:
                output, hidden = model.decoder(prev_char, hidden, 0)
            
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == end_token_index:
            break
    
    # Convert outputs to character list
    prediction = [train_data.vocab_hin.itos[index] for index in outputs]
    
    return prediction


In [10]:
def accuracy(dataset, model, cell_type):
    
    """
    This function will comapre the prediction given by the predict function and the target output.
    I will do word by word, so may take little more time.
    
    """
    
    # Initializing the count
    correct_count = 0
    # Number of data in our dataset
    words_count = len(dataset)
    
    for i in range(words_count):
           
        char_input = [dataset.vocab_eng.itos[index] for index in dataset[i][0].tolist()]
        
        prediction = predict(model, char_input, cell_type)
        
        actual_word = [dataset.vocab_hin.itos[index] for index in dataset[i][1].tolist()]
        
        if prediction == actual_word:
            correct_count+=1
            
            
    return 100*(correct_count/words_count)

In [11]:
def prediction_csv(dataset, model, cell_type):
    
    """
    This function will generate the csv file containig the inputs their 
    predictions and corresponding target word.
    model: Trained model
    
    """
    
    correct_count = 0
    words_count = len(dataset)
    
    list_of_words = []
    
    for i in range(words_count):
        list1 = []
        
        char_input = [dataset.vocab_eng.itos[index] for index in dataset[i][0].tolist()]
        
        input_string = ''.join(char_input[1:len(char_input)-1])
        list1.append(input_string)
        
        prediction = predict(model, char_input, cell_type)
        
        pred_string = ''.join(prediction[1:len(prediction)-1])
        list1.append(pred_string)
        
        actual_word = [dataset.vocab_hin.itos[index] for index in dataset[i][1].tolist()]
        
        target_string = ''.join(actual_word[1:len(actual_word)-1])
        list1.append(target_string)
        list_of_words.append(list1)
        
        if prediction == actual_word:
            correct_count+=1
            
    with open('predictions_vanilla.csv', mode='w', encoding='utf-8', newline='') as file:
        writer = csv.writer(file)
    
        header = ["Input", "Prediction","Target"]
    
        # Write header row
        writer.writerow(header)
        
        for i in range(words_count):
            writer.writerow(list_of_words[i])
            

In [12]:
def print_prediction(dataset, model, cell_type):
    
    """
    This function will generate the print the inputs, their 
    predictions and corresponding target word.
    model: Trained model
    
    """
    
    words_count = len(dataset)
    
    # For better looks of printed elements
    table = PrettyTable()
    table.field_names = ["Input", "Prediction", "Target"]
    
    #Creating the dataframe to store the 3 values
    df = pd.DataFrame(columns=['Input', 'prediction', 'Target'])
    
    for i in range(16):
        list1 = []
        
        # changing the word into list of characters.
        char_input = [dataset.vocab_eng.itos[index] for index in dataset[i][0].tolist()]
        
        input_string = ''.join(char_input[1:len(char_input)-1])
        list1.append(input_string)
        
        # getting the prediction using the predict function
        prediction = predict(model, char_input, cell_type)
        
        pred_string = ''.join(prediction[1:len(prediction)-1])
        list1.append(pred_string)
        
        # Preparing target word
        actual_word = [dataset.vocab_hin.itos[index] for index in dataset[i][1].tolist()]
        
        target_string = ''.join(actual_word[1:len(actual_word)-1])
        list1.append(target_string)

        # appending all the three vaues in their respective columns
        df = df.append({'Input': list1[0], 'prediction': list1[1], 'Target': list1[2]}, ignore_index=True)
    
    # Adding the content of dataframe in the the table
    for row in df.itertuples(index=False):
        table.add_row(row)
    
    print(table)
        


In [15]:
def train(num_epochs, learning_rate, batch_size, load_model, 
         input_size_encoder, input_size_decoder, output_size,
         encoder_embedding_size, decoder_embedding_size,
         hidden_size, num_layers, enc_dropout, de_dropout):
    
    """
    This function is created to train the Seq2Seq model manually(without wandb).
    It takes the all the arguments needed for the encoder, decoder and Seq2seq model.
    Using this function we can test our model on test dataset, just uncomment the relevant line 
    commented in the lower part of the code.
    We can also generate prediction_vanilla csv file just by uncomment the 
    second last commented part of this code.
    We can also print the prediction by uncommenting the last part
    
    """
   
    
    # Importing the Encoder class
    encoder_net = Encoder(input_size_encoder, encoder_embedding_size,
                         hidden_size, num_layers, enc_dropout, cell_type,
                          bi_directional).to(device)
    
    # Importing the Decoder class
    decoder_net = Decoder(input_size_decoder, decoder_embedding_size,
                         hidden_size, output_size, num_layers, dec_dropout, 
                          cell_type ,bi_directional).to(device)

    # Preparing the model
    model = Seq2Seq(encoder_net, decoder_net, cell_type).to(device)
    
    # Importing the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    pad_index = 0
    criterion = nn.CrossEntropyLoss(ignore_index=pad_index)

    if load_model:
        load_checkpoint(torch.load('my_checkpoint.pth.ptar'),model, optimizer)
    
    print("Training the model...")
    for epoch in range(num_epochs):
        print('Epoch: ', epoch+1, '/', num_epochs)
        
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            
            input_word = inputs.to(device)
            target_word = targets.to(device)

            output = model(input_word, target_word)
            # output shape: (target_len, batch_size, output_vocab_size)
            
            output = output[1:].reshape(-1, output.shape[2])
            target_word = target_word[1:].reshape(-1)

            optimizer.zero_grad()
            loss = criterion(output, target_word)

            loss.backward()

            # To handle large gradients:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()
            
        print("Training Loss: ", loss.item())
        model.eval()
        print("Finding accuracy of the model...")
        train_accu =  accuracy(train_data, model, cell_type)
        valid_accu =  accuracy(valid_data, model, cell_type)
        model.train()

        print("valid accuracy: ", valid_accu)
        print("train accuracy: ", train_accu)
                        
        
    ##=> For testing the model uncomment the code
    #model.eval()        
    #test_accur =  accuracy(test_data, model, cell_type) 

    ##=> Uncomment  prediction_csv only if you want to generate the csv file of predictions
    #prediction_csv(test_data, model, cell_type)  
    #model.train()  
    
#     print("Printing the predictions of Input words:")
#     print()
#     model.eval()
#     print_prediction(test_data, model, cell_type)
#     model.train()    
    
    


In [16]:
# # Data Uploading
# You can change the directory according to your data location
# out_lang: Choose which output language you want transliteration.
# 'hin':Hindi, 'urd':Urdu, 'tel':Telgu etc
root_dir = r'C:\Users\HICLIPS-ASK\aksharantar_sampled'
out_lang = 'hin'
batch_size = 64
train_loader, train_data = get_loader(root_dir, out_lang, 'train', batch_size=batch_size, pin_memory=True )
valid_loader, valid_data = get_loader(root_dir, out_lang, 'valid', batch_size=batch_size, pin_memory=True)
test_loader, test_data = get_loader(root_dir, out_lang, 'test', batch_size=batch_size, pin_memory=True)

In [17]:
#wandb_sweep = False

In [47]:
# To run manually Uncomment the 'wandb_sweep = False' in above line
if wandb_sweep == False:
    ## Giving the argument values for manual training
    num_epochs =1
    learning_rate = 0.001
    load_model = False
    input_size_encoder = len(train_data.vocab_eng)
    input_size_decoder = len(train_data.vocab_hin)
    output_size = len(train_data.vocab_hin)
    encoder_embedding_size = 256
    decoder_embedding_size = 256
    hidden_size = 256
    num_layers = 2
    enc_dropout = 0.2
    dec_dropout = 0.2
    load_model = False
    cell_type = 'lstm'
    bi_directional = True

    ## Training the model
    train(num_epochs, learning_rate, batch_size, load_model, 
             input_size_encoder, input_size_decoder, output_size,
             encoder_embedding_size, decoder_embedding_size,
             hidden_size, num_layers, enc_dropout, dec_dropout)

Epoch:  1 / 10
Training Loss:  0.8664389848709106
Epoch:  2 / 10
Training Loss:  0.7912437319755554
Epoch:  3 / 10
Training Loss:  0.5755021572113037
Epoch:  4 / 10
Training Loss:  0.8873854875564575
Epoch:  5 / 10
Training Loss:  0.6402363181114197
Epoch:  6 / 10
Training Loss:  0.5890380144119263
Epoch:  7 / 10
Training Loss:  0.44841286540031433
Epoch:  8 / 10
Training Loss:  0.4370160698890686
Epoch:  9 / 10
Training Loss:  0.480361670255661
Epoch:  10 / 10
Training Loss:  0.3889986276626587
Printing the predictions of Input words:

+--------------+------------+----------+
|    Input     | Prediction |  Target  |
+--------------+------------+----------+
|   thermax    |   थर्माक्स   |  थरमैक्स   |
|  sikhaaega   |  सिखाएगा   | सिखाएगा  |
|    learn     |   लीअर्न    |   लर्न    |
|   twitters   |   ट्विटर्स   |  ट्विटर्स  |
| tirunelveli  |  तिरुनेलवेली  | तिरुनेलवेली |
| independence |   इंडेपेंडेंस    |  इंडिपेंडेंस  |
|  speshiyon   |   स्पेशियों   |  स्पेशियों  |
|   shurooh   

## Train with Wandb

In [20]:
project_name = "Assignment 3"
entity_name = "am22s020"
import wandb

In [21]:
def train_with_wandb():


    config_defaults = {"cell_type": "lstm",
                       "num_layers": 4,
                       "hidden_size": 256,
                       "num_epochs":10,
                       "bi_dir":True,
                       "dropout": 0.2,
                       "embed_size":256
                      } 

    wandb.init(config=config_defaults, project=project_name, resume=False)
    
    config = wandb.config 
    
    
    learning_rate = 0.001
    load_model = False
    num_epochs = config.num_epochs
    encoder_embedding_size = config.embed_size
    decoder_embedding_size = config.embed_size
    input_size_encoder = len(train_data.vocab_eng)
    input_size_decoder = len(train_data.vocab_hin)
    output_size = len(train_data.vocab_hin)
    hidden_size = config.hidden_size
    num_layers = config.num_layers
    enc_dropout = config.dropout
    dec_dropout = config.dropout
    cell_type = config.cell_type
    bi_directional = config.bi_dir
    Test = True

    wandb.run.name  = "test_{}_cell_{}_nl_{}_hs_{}_e_{}_bd_{}_dr_{}_ems_{}".format(Test, cell_type,
                                                                          num_layers,
                                                                          hidden_size,
                                                                          num_epochs,
                                                                          bi_directional,
                                                                          enc_dropout,
                                                                          encoder_embedding_size
                                                                          )
                                                                              
                                                                                  
  
    print(wandb.run.name )
    
    encoder_net = Encoder(input_size_encoder, encoder_embedding_size,
                         hidden_size, num_layers, enc_dropout, cell_type,
                          bi_directional).to(device)

    decoder_net = Decoder(input_size_decoder, decoder_embedding_size,
                         hidden_size, output_size, num_layers, dec_dropout, 
                          cell_type ,bi_directional).to(device)

    model = Seq2Seq(encoder_net, decoder_net, cell_type).to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    pad_index = 0
    criterion = nn.CrossEntropyLoss(ignore_index=pad_index)

    if load_model:
        load_checkpoint(torch.load('my_checkpoint.pth.ptar'),model, optimizer)
    
    print("Training the model.....")
    for epoch in range(num_epochs):
        print('Epoch: ', epoch+1, '/', num_epochs)

        for batch_idx, (inputs, targets) in enumerate(train_loader):

            input_word = inputs.to(device)
            target_word = targets.to(device)

            output = model(input_word, target_word)
            # output shape: (target_len, batch_size, output_vocab_size)

            output = output[1:].reshape(-1, output.shape[2])
            target_word = target_word[1:].reshape(-1)

            optimizer.zero_grad()
            loss = criterion(output, target_word)

            loss.backward()

            # To handle large gradients:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()

        print("Training Loss: ", loss.item())
        
        train_loss = loss.item()
        if (epoch+1)%2 == 0:
            model.eval()
            print("Finding accuracy of the model.....")
            train_accu =  accuracy(train_data, model, cell_type)
            valid_accu =  accuracy(valid_data, model, cell_type)
            model.train()

            wandb.log({"valid accuracy": valid_accu, "train accuracy": train_accu,
                        "train loss": train_loss, 'epoch': epoch+1})
        else:
            wandb.log({"train loss": train_loss, 'epoch': epoch+1}) 
            
    ## For testing the model uncomment the code
#     model.eval()        
#     test_accur =  accuracy(test_data, model, cell_type) 

     # Uncomment  prediction_csv only if you want to generate the csv file of predictions
#     prediction_csv(test_data, model, cell_type)  
#     model.train()                                            
    
#     wandb.log({"test accuracy": test_accur})
        
    wandb.run.finish()
    

In [18]:
#train_with_wandb()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train loss,█▅▆▃▃▄▂▂▂▁

0,1
epoch,10.0
train loss,0.25989


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333332902596, max=1.0…

test_True_cell_lstm_nl_4_hs_256_e_10_bd_True_dr_0.2_ems_256
Epoch:  1 / 10
Training Loss:  0.8337882161140442
Epoch:  2 / 10
Training Loss:  0.8129757642745972
Epoch:  3 / 10
Training Loss:  0.6103125810623169
Epoch:  4 / 10
Training Loss:  0.4861854910850525
Epoch:  5 / 10
Training Loss:  0.5185434222221375
Epoch:  6 / 10
Training Loss:  0.4606669247150421
Epoch:  7 / 10
Training Loss:  0.4174833297729492
Epoch:  8 / 10
Training Loss:  0.544682502746582
Epoch:  9 / 10
Training Loss:  0.36857840418815613
Epoch:  10 / 10
Training Loss:  0.27932241559028625


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
test accuracy,▁
train loss,██▅▄▄▃▃▄▂▁

0,1
epoch,10.0
test accuracy,39.0625
train loss,0.27932


In [22]:
hyperparameters = {

        "num_layers": {
            "values": [2, 3, 4]
        },
        "hidden_size": {
            "values": [64, 128, 256]
        },
        "cell_type": {
            "values": ["rnn", "gru", "lstm"]
        },
        "num_epochs":{
            "values": [10, 15, 20]
        },
        "bi_dir":{
            "values": [False, True]
        },
        "dropout": {
            "values": [0.2, 0.3, 0.5]
        },
        "embed_size":{
            "values": [64, 128, 256]
        },
  }


In [23]:
def wandb_sweep(project_name, entity_name):
    '''
    This function is used to run the wandb sweeps. 
    It takes in project name and entity name as input , and does not return any value.

    '''
    sweep_config={

      "method": "bayes",
      "metric": {
          "name": "valid_accu", 
          "goal": "maximize"
          },
      "parameters":hyperparameters
    }

    sweep_id=wandb.sweep(sweep_config, project=project_name, entity=entity_name)
    wandb.agent(sweep_id,train_with_wandb)

In [23]:
if wandb_sweep == True:
    wandb_sweep(project_name, entity_name)

Create sweep with ID: a7mxe1u0
Sweep URL: https://wandb.ai/am22s020/Assignment%203/sweeps/a7mxe1u0


[34m[1mwandb[0m: Agent Starting Run: mdrvba44 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: Currently logged in as: [33mam22s020[0m. Use [1m`wandb login --relogin`[0m to force relogin


cell_rnn_nl_3_hs_256_e_20_bd_False_dr_0.3_ems_64
Epoch:  1 / 20
Training Loss:  3.0478715896606445
Epoch:  2 / 20
Training Loss:  2.8362088203430176
Epoch:  3 / 20
Training Loss:  3.0664327144622803
Epoch:  4 / 20
Training Loss:  2.8907670974731445
Epoch:  5 / 20
Training Loss:  2.9632322788238525
Epoch:  6 / 20
Training Loss:  2.850958824157715
Epoch:  7 / 20
Training Loss:  2.7554028034210205
Epoch:  8 / 20
Training Loss:  2.886122703552246
Epoch:  9 / 20
Training Loss:  2.846931219100952
Epoch:  10 / 20
Training Loss:  2.8242266178131104
Epoch:  11 / 20
Training Loss:  2.814845323562622
Epoch:  12 / 20
Training Loss:  2.8845555782318115
Epoch:  13 / 20
Training Loss:  2.862987518310547


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▂▂▃▄▄▅▅▆▇▇█
train accuracy,▁▁▁▁▁▁▁▁▁▁▁▁
train loss,█▃█▄▆▃▁▄▃▃▂▄
valid accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,12.0
train accuracy,0.0
train loss,2.88456
valid accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 0hassjm2 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_4_hs_128_e_10_bd_True_dr_0.2_ems_256
Epoch:  1 / 10
Training Loss:  1.2406724691390991
Epoch:  2 / 10
Training Loss:  1.0732945203781128
Epoch:  3 / 10
Training Loss:  0.6862814426422119
Epoch:  4 / 10
Training Loss:  0.6769680380821228
Epoch:  5 / 10
Training Loss:  0.850723385810852
Epoch:  6 / 10
Training Loss:  0.8917537927627563
Epoch:  7 / 10
Training Loss:  0.6394501328468323
Epoch:  8 / 10
Training Loss:  0.661053478717804
Epoch:  9 / 10
Training Loss:  0.43119263648986816
Epoch:  10 / 10
Training Loss:  0.7015020847320557


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▃▄▅▆▆▇▇██
train loss,█▇▃▃▅▅▃▃▁▃
valid accuracy,▁▄▆▆▇▇████

0,1
epoch,10.0
train accuracy,45.51367
train loss,0.7015
valid accuracy,37.03613


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9snw6td4 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333333327028, max=1.0…

cell_lstm_nl_2_hs_64_e_20_bd_False_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  2.7114460468292236
Epoch:  2 / 20
Training Loss:  2.1377992630004883
Epoch:  3 / 20
Training Loss:  1.8603490591049194
Epoch:  4 / 20
Training Loss:  1.4554076194763184
Epoch:  5 / 20
Training Loss:  1.395287036895752
Epoch:  6 / 20
Training Loss:  1.238165259361267
Epoch:  7 / 20
Training Loss:  1.4008246660232544
Epoch:  8 / 20
Training Loss:  1.1270248889923096
Epoch:  9 / 20
Training Loss:  1.2872295379638672
Epoch:  10 / 20
Training Loss:  1.10170578956604
Epoch:  11 / 20
Training Loss:  0.980254590511322
Epoch:  12 / 20
Training Loss:  0.9743590354919434
Epoch:  13 / 20
Training Loss:  1.0085052251815796
Epoch:  14 / 20
Training Loss:  0.9857277870178223
Epoch:  15 / 20
Training Loss:  1.0388023853302002
Epoch:  16 / 20
Training Loss:  0.9821643829345703
Epoch:  17 / 20
Training Loss:  1.085750937461853
Epoch:  18 / 20
Training Loss:  0.83897465467453
Epoch:  19 / 20
Training Loss:  0.832578003406524

VBox(children=(Label(value='0.000 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▁▂▂▃▄▄▅▆▆▆▆▇▇▇▇▇▇██
train loss,█▆▅▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▃
valid accuracy,▁▁▂▃▄▅▅▆▇▇▇▇▇██▇▇███

0,1
epoch,20.0
train accuracy,11.75195
train loss,1.25115
valid accuracy,13.94043


[34m[1mwandb[0m: Agent Starting Run: lf0f5ayl with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_3_hs_256_e_20_bd_True_dr_0.2_ems_128
Epoch:  1 / 20
Training Loss:  1.0315338373184204
Epoch:  2 / 20
Training Loss:  0.7239575982093811
Epoch:  3 / 20
Training Loss:  0.8687513470649719
Epoch:  4 / 20
Training Loss:  0.4988389313220978
Epoch:  5 / 20
Training Loss:  0.5611096024513245
Epoch:  6 / 20
Training Loss:  0.5542365908622742
Epoch:  7 / 20
Training Loss:  0.5384493470191956
Epoch:  8 / 20
Training Loss:  0.791782557964325
Epoch:  9 / 20
Training Loss:  0.5379714369773865
Epoch:  10 / 20
Training Loss:  0.48269346356391907
Epoch:  11 / 20
Training Loss:  0.4925650954246521
Epoch:  12 / 20
Training Loss:  0.3599168360233307
Epoch:  13 / 20
Training Loss:  0.4609505534172058
Epoch:  14 / 20
Training Loss:  0.44660183787345886
Epoch:  15 / 20
Training Loss:  0.34377458691596985
Epoch:  16 / 20
Training Loss:  0.35803118348121643
Epoch:  17 / 20
Training Loss:  0.7098659873008728
Epoch:  18 / 20
Training Loss:  0.5002330541610718
Epoch:  19 / 20
Training Loss:  0.53559

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▃▄▅▅▆▆▇▇▇▇▇▇███████
train loss,█▅▆▃▃▃▃▆▃▂▃▁▂▂▁▁▅▃▃▂
valid accuracy,▁▅▆▇▇▇▇████▇███▇████

0,1
epoch,20.0
train accuracy,58.15625
train loss,0.46219
valid accuracy,35.79102


[34m[1mwandb[0m: Agent Starting Run: mqtxyr13 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_gru_nl_4_hs_256_e_15_bd_True_dr_0.3_ems_128
Epoch:  1 / 15
Training Loss:  0.8319637775421143
Epoch:  2 / 15
Training Loss:  0.7701839804649353
Epoch:  3 / 15
Training Loss:  0.8049006462097168
Epoch:  4 / 15
Training Loss:  0.5651125311851501
Epoch:  5 / 15
Training Loss:  0.7360684871673584
Epoch:  6 / 15
Training Loss:  0.6167250275611877
Epoch:  7 / 15
Training Loss:  1.0255573987960815
Epoch:  8 / 15
Training Loss:  0.5690497159957886
Epoch:  9 / 15
Training Loss:  0.6525317430496216
Epoch:  10 / 15
Training Loss:  0.6679001450538635
Epoch:  11 / 15
Training Loss:  0.4454616904258728
Epoch:  12 / 15
Training Loss:  0.5680645704269409
Epoch:  13 / 15
Training Loss:  0.6487757563591003
Epoch:  14 / 15
Training Loss:  0.6032973527908325
Epoch:  15 / 15
Training Loss:  0.48512396216392517


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▃▅▅▆▇▇▇▇██████
train loss,▆▅▅▂▅▃█▂▃▄▁▂▃▃▁
valid accuracy,▁▃▆▅▇▇▇████████

0,1
epoch,15.0
train accuracy,45.25977
train loss,0.48512
valid accuracy,35.54688


[34m[1mwandb[0m: Agent Starting Run: c1vpffyj with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

cell_lstm_nl_4_hs_256_e_20_bd_True_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  1.1742550134658813
Epoch:  2 / 20
Training Loss:  0.760231077671051
Epoch:  3 / 20
Training Loss:  0.7279675602912903
Epoch:  4 / 20
Training Loss:  0.5604379177093506
Epoch:  5 / 20
Training Loss:  0.4031890034675598
Epoch:  6 / 20
Training Loss:  0.423007071018219
Epoch:  7 / 20
Training Loss:  0.38270169496536255
Epoch:  8 / 20
Training Loss:  0.3946875035762787
Epoch:  9 / 20
Training Loss:  0.29807034134864807
Epoch:  10 / 20
Training Loss:  0.32540154457092285
Epoch:  11 / 20
Training Loss:  0.5148805379867554
Epoch:  12 / 20
Training Loss:  0.4705757200717926
Epoch:  13 / 20
Training Loss:  0.4529900848865509
Epoch:  14 / 20
Training Loss:  0.43131476640701294
Epoch:  15 / 20
Training Loss:  0.2386656254529953
Epoch:  16 / 20
Training Loss:  0.3397161066532135
Epoch:  17 / 20
Training Loss:  0.28883934020996094
Epoch:  18 / 20
Training Loss:  0.19707071781158447
Epoch:  19 / 20
Training Loss:  0.237

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▂▃▄▄▅▅▅▆▆▆▇▇▇▇▇████
train loss,█▅▅▄▃▃▃▃▂▂▄▃▃▃▂▂▂▁▂▁
valid accuracy,▁▄▅▆▇▇██████████████

0,1
epoch,20.0
train accuracy,81.46289
train loss,0.14761
valid accuracy,40.55176


[34m[1mwandb[0m: Agent Starting Run: oma55f5r with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_2_hs_64_e_10_bd_True_dr_0.5_ems_64
Epoch:  1 / 10
Training Loss:  1.9937938451766968
Epoch:  2 / 10
Training Loss:  1.7591896057128906
Epoch:  3 / 10
Training Loss:  1.2852426767349243
Epoch:  4 / 10
Training Loss:  1.2852590084075928
Epoch:  5 / 10
Training Loss:  1.2821885347366333
Epoch:  6 / 10
Training Loss:  1.152632713317871
Epoch:  7 / 10
Training Loss:  1.1045602560043335
Epoch:  8 / 10
Training Loss:  1.0628716945648193
Epoch:  9 / 10
Training Loss:  0.9616380929946899
Epoch:  10 / 10
Training Loss:  1.0727113485336304


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▂▃▄▅▆▆▇▇█
train loss,█▆▃▃▃▂▂▂▁▂
valid accuracy,▁▃▄▅▆▆▆▇██

0,1
epoch,10.0
train accuracy,20.16602
train loss,1.07271
valid accuracy,24.41406


[34m[1mwandb[0m: Agent Starting Run: fc1v6csz with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_rnn_nl_4_hs_64_e_15_bd_True_dr_0.2_ems_128
Epoch:  1 / 15
Training Loss:  2.328763723373413
Epoch:  2 / 15
Training Loss:  1.9709051847457886
Epoch:  3 / 15
Training Loss:  1.9200576543807983
Epoch:  4 / 15
Training Loss:  1.961483120918274
Epoch:  5 / 15
Training Loss:  1.6956756114959717
Epoch:  6 / 15
Training Loss:  1.6254582405090332
Epoch:  7 / 15
Training Loss:  1.7069753408432007
Epoch:  8 / 15
Training Loss:  1.4327038526535034
Epoch:  9 / 15
Training Loss:  1.3770476579666138
Epoch:  10 / 15
Training Loss:  1.4056748151779175
Epoch:  11 / 15
Training Loss:  1.3296442031860352
Epoch:  12 / 15
Training Loss:  1.4689027070999146
Epoch:  13 / 15
Training Loss:  1.1586112976074219
Epoch:  14 / 15
Training Loss:  1.3655487298965454
Epoch:  15 / 15
Training Loss:  1.3833591938018799


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▁▁▁▂▁▂▂▃▄▄▇▅▆█
train loss,█▆▆▆▄▄▄▃▂▂▂▃▁▂▂
valid accuracy,▁▁▁▁▂▂▂▂▃▄▅█▆▆█

0,1
epoch,15.0
train accuracy,3.75195
train loss,1.38336
valid accuracy,3.85742


[34m[1mwandb[0m: Agent Starting Run: orwgzu9f with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 4


cell_gru_nl_4_hs_128_e_10_bd_True_dr_0.2_ems_256
Epoch:  1 / 10
Training Loss:  1.1538264751434326
Epoch:  2 / 10
Training Loss:  0.8379322290420532
Epoch:  3 / 10
Training Loss:  0.6871095895767212
Epoch:  4 / 10
Training Loss:  0.9325300455093384
Epoch:  5 / 10
Training Loss:  0.7006354331970215
Epoch:  6 / 10
Training Loss:  0.7318350672721863
Epoch:  7 / 10
Training Loss:  0.6332029700279236
Epoch:  8 / 10
Training Loss:  0.5876648426055908


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


Epoch:  9 / 10
Training Loss:  0.5344516038894653
Epoch:  10 / 10
Training Loss:  0.4663931429386139


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▃▄▅▆▆▇▇▇█
train loss,█▅▃▆▃▄▃▂▂▁
valid accuracy,▁▄▆▆▆▇▇▇██

0,1
epoch,10.0
train accuracy,42.65234
train loss,0.46639
valid accuracy,34.10645


[34m[1mwandb[0m: Agent Starting Run: srmmb8oz with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 3


cell_gru_nl_3_hs_64_e_20_bd_True_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  1.5755342245101929
Epoch:  2 / 20
Training Loss:  1.2679063081741333
Epoch:  3 / 20
Training Loss:  1.0722676515579224
Epoch:  4 / 20
Training Loss:  1.2267605066299438
Epoch:  5 / 20
Training Loss:  0.7481057643890381
Epoch:  6 / 20
Training Loss:  1.0017156600952148
Epoch:  7 / 20
Training Loss:  0.7943402528762817
Epoch:  8 / 20
Training Loss:  0.9339029788970947
Epoch:  9 / 20
Training Loss:  1.0701096057891846
Epoch:  10 / 20
Training Loss:  0.7303330302238464
Epoch:  11 / 20
Training Loss:  0.7375251054763794
Epoch:  12 / 20
Training Loss:  0.7538200616836548
Epoch:  13 / 20
Training Loss:  1.1178746223449707
Epoch:  14 / 20
Training Loss:  0.8165115714073181
Epoch:  15 / 20
Training Loss:  0.8711275458335876
Epoch:  16 / 20
Training Loss:  0.7784141302108765
Epoch:  17 / 20
Training Loss:  0.5989426970481873
Epoch:  18 / 20
Training Loss:  0.6824040412902832
Epoch:  19 / 20
Training Loss:  0.671461164

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▃▄▄▅▅▆▆▆▇▇▇▇▇██████
train loss,█▆▄▆▂▄▂▃▄▂▂▂▅▃▃▂▁▂▂▁
valid accuracy,▁▃▄▆▆▆▆▇▇▇▇▇▇███████

0,1
epoch,20.0
train accuracy,32.98438
train loss,0.60093
valid accuracy,30.88379


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y23o7qop with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_gru_nl_4_hs_256_e_15_bd_False_dr_0.3_ems_64
Epoch:  1 / 15
Training Loss:  1.4826678037643433
Epoch:  2 / 15
Training Loss:  1.1584174633026123
Epoch:  3 / 15
Training Loss:  0.8403890132904053
Epoch:  4 / 15
Training Loss:  1.1312596797943115
Epoch:  5 / 15
Training Loss:  0.7296562194824219
Epoch:  6 / 15
Training Loss:  0.6861956119537354
Epoch:  7 / 15
Training Loss:  0.7419924139976501
Epoch:  8 / 15
Training Loss:  0.6498790383338928
Epoch:  9 / 15
Training Loss:  0.7450131773948669
Epoch:  10 / 15
Training Loss:  0.656376838684082
Epoch:  11 / 15
Training Loss:  0.6276252865791321
Epoch:  12 / 15
Training Loss:  0.6139034628868103
Epoch:  13 / 15
Training Loss:  0.7513092160224915
Epoch:  14 / 15
Training Loss:  0.560623288154602
Epoch:  15 / 15
Training Loss:  0.5683655142784119


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▃▄▅▅▆▆▇▇▇▇████
train loss,█▆▃▅▂▂▂▂▂▂▂▁▂▁▁
valid accuracy,▁▃▅▆▅▇▆▇▇▇█████

0,1
epoch,15.0
train accuracy,43.17969
train loss,0.56837
valid accuracy,35.62012


[34m[1mwandb[0m: Agent Starting Run: 37wllc3l with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 4


cell_lstm_nl_4_hs_64_e_20_bd_True_dr_0.5_ems_256
Epoch:  1 / 20
Training Loss:  2.621673107147217
Epoch:  2 / 20
Training Loss:  2.031122922897339
Epoch:  3 / 20
Training Loss:  1.6848623752593994
Epoch:  4 / 20
Training Loss:  1.4482020139694214
Epoch:  5 / 20
Training Loss:  1.2691023349761963
Epoch:  6 / 20
Training Loss:  1.2989557981491089
Epoch:  7 / 20
Training Loss:  1.2238961458206177
Epoch:  8 / 20
Training Loss:  1.0449508428573608
Epoch:  9 / 20
Training Loss:  1.070694088935852
Epoch:  10 / 20
Training Loss:  0.8999118804931641
Epoch:  11 / 20
Training Loss:  1.1086604595184326
Epoch:  12 / 20
Training Loss:  0.8795908093452454
Epoch:  13 / 20
Training Loss:  0.8477621674537659
Epoch:  14 / 20
Training Loss:  1.0488002300262451
Epoch:  15 / 20
Training Loss:  0.944308876991272
Epoch:  16 / 20
Training Loss:  0.795745849609375
Epoch:  17 / 20
Training Loss:  0.8276112675666809
Epoch:  18 / 20
Training Loss:  0.8584467768669128
Epoch:  19 / 20
Training Loss:  0.8635779023170

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▁▂▃▄▄▅▅▅▆▆▆▇▇▇▇▇███
train loss,█▆▄▄▃▃▃▂▂▁▂▁▁▂▂▁▁▁▁▁
valid accuracy,▁▁▃▃▄▅▅▆▆▆▇▇▇▇▇▇████

0,1
epoch,20.0
train accuracy,28.8418
train loss,0.81894
valid accuracy,31.90918


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: px879v65 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_rnn_nl_4_hs_64_e_20_bd_True_dr_0.3_ems_64
Epoch:  1 / 20
Training Loss:  2.2962074279785156
Epoch:  2 / 20
Training Loss:  2.0648813247680664
Epoch:  3 / 20
Training Loss:  1.97859525680542
Epoch:  4 / 20
Training Loss:  1.8033088445663452
Epoch:  5 / 20
Training Loss:  1.6661126613616943
Epoch:  6 / 20
Training Loss:  1.838710904121399


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train accuracy,▅▁▁█▅
train loss,█▅▄▃▁
valid accuracy,▁▁▁▁▁

0,1
epoch,5.0
train accuracy,0.00195
train loss,1.66611
valid accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: uke5c7fw with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_3_hs_64_e_15_bd_True_dr_0.5_ems_128
Epoch:  1 / 15
Training Loss:  2.064755916595459
Epoch:  2 / 15
Training Loss:  1.7709590196609497
Epoch:  3 / 15
Training Loss:  1.862329125404358
Epoch:  4 / 15
Training Loss:  1.4062758684158325
Epoch:  5 / 15
Training Loss:  1.3275519609451294
Epoch:  6 / 15
Training Loss:  1.4658101797103882
Epoch:  7 / 15
Training Loss:  1.1509983539581299
Epoch:  8 / 15
Training Loss:  0.9444053769111633
Epoch:  9 / 15
Training Loss:  1.0004630088806152
Epoch:  10 / 15
Training Loss:  1.1278265714645386
Epoch:  11 / 15
Training Loss:  1.2420284748077393
Epoch:  12 / 15
Training Loss:  1.1983771324157715
Epoch:  13 / 15
Training Loss:  1.100089192390442
Epoch:  14 / 15
Training Loss:  1.0170081853866577
Epoch:  15 / 15
Training Loss:  0.9840360879898071


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▂▃▄▅▅▆▆▆▇▇████
train loss,█▆▇▄▃▄▂▁▁▂▃▃▂▁▁
valid accuracy,▁▂▃▅▅▆▆▆▇▇▇████

0,1
epoch,15.0
train accuracy,20.6582
train loss,0.98404
valid accuracy,23.90137


[34m[1mwandb[0m: Agent Starting Run: szoq8hq9 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

cell_rnn_nl_3_hs_256_e_10_bd_True_dr_0.5_ems_256
Epoch:  1 / 10
Training Loss:  1.9392850399017334
Epoch:  2 / 10
Training Loss:  1.835017204284668
Epoch:  3 / 10
Training Loss:  1.805838942527771
Epoch:  4 / 10
Training Loss:  1.539591908454895
Epoch:  5 / 10
Training Loss:  1.6093065738677979
Epoch:  6 / 10
Training Loss:  1.45222806930542
Epoch:  7 / 10
Training Loss:  1.5497193336486816
Epoch:  8 / 10
Training Loss:  1.7995336055755615
Epoch:  9 / 10
Training Loss:  1.4612292051315308
Epoch:  10 / 10
Training Loss:  1.5874401330947876


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▁▁▁▂▃▂▃▇█
train loss,█▇▆▂▃▁▂▆▁▃
valid accuracy,▁▁▁▁▁▂▂▂██

0,1
epoch,10.0
train accuracy,4.03711
train loss,1.58744
valid accuracy,5.59082


[34m[1mwandb[0m: Agent Starting Run: uzqbwrk5 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_2_hs_64_e_10_bd_False_dr_0.3_ems_256
Epoch:  1 / 10
Training Loss:  2.4783942699432373
Epoch:  2 / 10
Training Loss:  1.9863172769546509
Epoch:  3 / 10
Training Loss:  1.7494693994522095
Epoch:  4 / 10
Training Loss:  1.7878997325897217
Epoch:  5 / 10
Training Loss:  1.6628986597061157
Epoch:  6 / 10
Training Loss:  1.5024558305740356
Epoch:  7 / 10
Training Loss:  1.3634305000305176
Epoch:  8 / 10
Training Loss:  1.3287935256958008
Epoch:  9 / 10
Training Loss:  1.259003758430481
Epoch:  10 / 10
Training Loss:  1.1827806234359741


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▁▂▂▄▅▆▇▇█
train loss,█▅▄▄▄▃▂▂▁▁
valid accuracy,▁▂▃▃▅▆▇█▇█

0,1
epoch,10.0
train accuracy,6.52539
train loss,1.18278
valid accuracy,8.20312


[34m[1mwandb[0m: Agent Starting Run: 0xpyc7go with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_3_hs_128_e_10_bd_True_dr_0.3_ems_256
Epoch:  1 / 10
Training Loss:  1.167794942855835
Epoch:  2 / 10
Training Loss:  0.9751072525978088
Epoch:  3 / 10
Training Loss:  0.7995004057884216
Epoch:  4 / 10
Training Loss:  0.7830536961555481
Epoch:  5 / 10
Training Loss:  0.7767279148101807
Epoch:  6 / 10
Training Loss:  0.6585379838943481
Epoch:  7 / 10
Training Loss:  0.5837382078170776
Epoch:  8 / 10
Training Loss:  0.4813898503780365
Epoch:  9 / 10
Training Loss:  0.6425133347511292
Epoch:  10 / 10
Training Loss:  0.5724905133247375


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▃▄▅▆▆▇▇▇█
train loss,█▆▄▄▄▃▂▁▃▂
valid accuracy,▁▄▅▆▆▇▇▇██

0,1
epoch,10.0
train accuracy,42.90039
train loss,0.57249
valid accuracy,38.23242


[34m[1mwandb[0m: Agent Starting Run: hh3u0xj6 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_4_hs_64_e_15_bd_False_dr_0.5_ems_64
Epoch:  1 / 15
Training Loss:  3.110419273376465
Epoch:  2 / 15
Training Loss:  2.958446502685547
Epoch:  3 / 15
Training Loss:  2.6890063285827637
Epoch:  4 / 15
Training Loss:  2.5392262935638428
Epoch:  5 / 15
Training Loss:  2.223663568496704
Epoch:  6 / 15
Training Loss:  2.3340296745300293
Epoch:  7 / 15
Training Loss:  1.8728702068328857
Epoch:  8 / 15
Training Loss:  1.8081676959991455
Epoch:  9 / 15
Training Loss:  1.697816252708435
Epoch:  10 / 15
Training Loss:  1.7136175632476807
Epoch:  11 / 15
Training Loss:  1.6111762523651123
Epoch:  12 / 15
Training Loss:  1.5784655809402466
Epoch:  13 / 15
Training Loss:  1.5106918811798096
Epoch:  14 / 15
Training Loss:  1.3682001829147339
Epoch:  15 / 15
Training Loss:  1.7705137729644775


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▁▁▁▁▁▂▂▃▄▅▆▇▇█
train loss,█▇▆▆▄▅▃▃▂▂▂▂▂▁▃
valid accuracy,▁▁▁▁▁▂▂▃▄▅▅▆▇██

0,1
epoch,15.0
train accuracy,9.21094
train loss,1.77051
valid accuracy,14.50195


[34m[1mwandb[0m: Agent Starting Run: 9dy9gsgh with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_2_hs_64_e_20_bd_False_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  2.428826093673706
Epoch:  2 / 20
Training Loss:  1.9252842664718628
Epoch:  3 / 20
Training Loss:  1.7067337036132812
Epoch:  4 / 20
Training Loss:  1.2731027603149414
Epoch:  5 / 20
Training Loss:  1.4769644737243652
Epoch:  6 / 20
Training Loss:  1.1457324028015137
Epoch:  7 / 20
Training Loss:  1.1597038507461548
Epoch:  8 / 20
Training Loss:  1.0694091320037842
Epoch:  9 / 20
Training Loss:  1.0640604496002197
Epoch:  10 / 20
Training Loss:  1.141352891921997
Epoch:  11 / 20
Training Loss:  0.9450796246528625
Epoch:  12 / 20
Training Loss:  1.0689184665679932
Epoch:  13 / 20
Training Loss:  0.9095141887664795
Epoch:  14 / 20
Training Loss:  1.056807041168213
Epoch:  15 / 20
Training Loss:  1.0695050954818726
Epoch:  16 / 20
Training Loss:  0.9930692315101624
Epoch:  17 / 20
Training Loss:  1.2608386278152466
Epoch:  18 / 20
Training Loss:  1.0552644729614258
Epoch:  19 / 20
Training Loss:  1.1248536109

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▁▂▃▃▄▄▄▅▅▆▆▇▇▇▇▇█▇█
train loss,█▆▅▃▄▂▂▂▂▂▁▂▁▂▂▂▃▂▂▁
valid accuracy,▁▂▂▃▄▅▅▅▆▆▆▆▇▇▇▇█▇▇█

0,1
epoch,20.0
train accuracy,11.84375
train loss,0.8463
valid accuracy,14.08691


[34m[1mwandb[0m: Agent Starting Run: chapb790 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333333145128, max=1.0…

cell_gru_nl_3_hs_128_e_20_bd_True_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  0.9862996339797974
Epoch:  2 / 20
Training Loss:  0.8349027633666992
Epoch:  3 / 20
Training Loss:  1.0236483812332153
Epoch:  4 / 20
Training Loss:  0.7254766821861267
Epoch:  5 / 20
Training Loss:  0.678637683391571
Epoch:  6 / 20
Training Loss:  0.6448912024497986
Epoch:  7 / 20
Training Loss:  0.8983640074729919
Epoch:  8 / 20
Training Loss:  0.6177055835723877
Epoch:  9 / 20
Training Loss:  0.5775460004806519
Epoch:  10 / 20
Training Loss:  0.5609771013259888
Epoch:  11 / 20
Training Loss:  0.6935942769050598
Epoch:  12 / 20
Training Loss:  0.5714068412780762
Epoch:  13 / 20
Training Loss:  0.7083931565284729
Epoch:  14 / 20
Training Loss:  0.534706175327301
Epoch:  15 / 20
Training Loss:  0.6059728264808655
Epoch:  16 / 20
Training Loss:  0.5204353332519531
Epoch:  17 / 20
Training Loss:  0.5438492894172668
Epoch:  18 / 20
Training Loss:  0.5746018886566162
Epoch:  19 / 20
Training Loss:  0.5215150713

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▃▄▄▅▅▆▆▆▇▇▇▇▇▇█████
train loss,▇▅█▄▃▃▆▂▂▂▃▂▄▁▂▁▁▂▁▅
valid accuracy,▁▄▅▅▆▆▇▇▇█▇██▇█▇████

0,1
epoch,20.0
train accuracy,48.68555
train loss,0.80242
valid accuracy,35.9375


[34m[1mwandb[0m: Agent Starting Run: 0j0a9p5h with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_rnn_nl_2_hs_128_e_20_bd_True_dr_0.3_ems_256
Epoch:  1 / 20
Training Loss:  2.147390365600586
Epoch:  2 / 20
Training Loss:  1.9807170629501343
Epoch:  3 / 20
Training Loss:  1.8705298900604248
Epoch:  4 / 20
Training Loss:  1.752254843711853
Epoch:  5 / 20
Training Loss:  1.692618489265442
Epoch:  6 / 20
Training Loss:  1.7571474313735962
Epoch:  7 / 20
Training Loss:  1.7400479316711426
Epoch:  8 / 20
Training Loss:  1.915268898010254
Epoch:  9 / 20
Training Loss:  1.4902541637420654
Epoch:  10 / 20
Training Loss:  1.580213189125061
Epoch:  11 / 20
Training Loss:  1.6333351135253906
Epoch:  12 / 20
Training Loss:  1.5197333097457886
Epoch:  13 / 20
Training Loss:  1.5924670696258545
Epoch:  14 / 20
Training Loss:  1.3283854722976685
Epoch:  15 / 20
Training Loss:  1.641528606414795
Epoch:  16 / 20
Training Loss:  1.5699204206466675
Epoch:  17 / 20
Training Loss:  1.3742446899414062
Epoch:  18 / 20
Training Loss:  1.6339573860168457
Epoch:  19 / 20
Training Loss:  1.37605702877044

VBox(children=(Label(value='0.000 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▁▁▁▁▁▃▂▂▃▂▅▅▆▇▇▇▇█▇
train loss,█▇▆▅▄▅▅▆▂▃▄▃▃▁▄▃▁▄▁▃
valid accuracy,▁▁▁▁▁▁▃▂▂▃▂▅▅▆▇▆▇▆█▆

0,1
epoch,20.0
train accuracy,3.33789
train loss,1.54613
valid accuracy,5.41992


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wfen4k3q with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_lstm_nl_2_hs_64_e_15_bd_True_dr_0.3_ems_128
Epoch:  1 / 15
Training Loss:  1.6141103506088257
Epoch:  2 / 15
Training Loss:  1.4492719173431396
Epoch:  3 / 15
Training Loss:  1.3724950551986694
Epoch:  4 / 15
Training Loss:  1.2256454229354858
Epoch:  5 / 15
Training Loss:  0.9794718623161316
Epoch:  6 / 15
Training Loss:  0.8787771463394165
Epoch:  7 / 15
Training Loss:  0.8812308311462402
Epoch:  8 / 15
Training Loss:  0.8783848285675049
Epoch:  9 / 15
Training Loss:  0.9720834493637085
Epoch:  10 / 15
Training Loss:  0.8174080848693848
Epoch:  11 / 15
Training Loss:  1.027345895767212
Epoch:  12 / 15
Training Loss:  0.8966261148452759
Epoch:  13 / 15
Training Loss:  0.8414806127548218
Epoch:  14 / 15
Training Loss:  0.8007476329803467
Epoch:  15 / 15
Training Loss:  0.5519066452980042


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▃▄▄▅▆▆▆▇▇▇████
train loss,█▇▆▅▄▃▃▃▄▃▄▃▃▃▁
valid accuracy,▁▃▄▅▅▆▇▇▇▇█████

0,1
epoch,15.0
train accuracy,29.77734
train loss,0.55191
valid accuracy,30.59082


[34m[1mwandb[0m: Agent Starting Run: lqjhl2bd with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333333145128, max=1.0…

cell_lstm_nl_2_hs_256_e_20_bd_True_dr_0.5_ems_64
Epoch:  1 / 20
Training Loss:  1.2435314655303955
Epoch:  2 / 20
Training Loss:  0.8525512218475342
Epoch:  3 / 20
Training Loss:  0.7842576503753662
Epoch:  4 / 20
Training Loss:  0.856486976146698
Epoch:  5 / 20
Training Loss:  0.7052022218704224
Epoch:  6 / 20
Training Loss:  0.9154533743858337
Epoch:  7 / 20
Training Loss:  0.5245258808135986
Epoch:  8 / 20
Training Loss:  0.6371005773544312
Epoch:  9 / 20
Training Loss:  0.5394026637077332
Epoch:  10 / 20
Training Loss:  0.4653800427913666
Epoch:  11 / 20
Training Loss:  0.6738278269767761
Epoch:  12 / 20
Training Loss:  0.43926939368247986
Epoch:  13 / 20
Training Loss:  0.5226016044616699
Epoch:  14 / 20
Training Loss:  0.5842046141624451
Epoch:  15 / 20
Training Loss:  0.6583431959152222
Epoch:  16 / 20
Training Loss:  0.5222588777542114
Epoch:  17 / 20
Training Loss:  0.3678765892982483
Epoch:  18 / 20
Training Loss:  0.24893036484718323
Epoch:  19 / 20
Training Loss:  0.3636111

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▂▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇███
train loss,█▅▅▅▄▆▃▄▃▃▄▂▃▃▄▃▂▁▂▂
valid accuracy,▁▄▅▆▆▇▇▇▇▇██████████

0,1
epoch,20.0
train accuracy,62.10352
train loss,0.41602
valid accuracy,39.69727


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8wxube9p with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_3_hs_64_e_10_bd_True_dr_0.3_ems_64
Epoch:  1 / 10
Training Loss:  1.9137864112854004
Epoch:  2 / 10
Training Loss:  1.2832666635513306
Epoch:  3 / 10
Training Loss:  1.339212417602539
Epoch:  4 / 10
Training Loss:  0.8817259669303894
Epoch:  5 / 10
Training Loss:  0.9067984819412231
Epoch:  6 / 10
Training Loss:  1.0449872016906738
Epoch:  7 / 10
Training Loss:  1.1369551420211792
Epoch:  8 / 10
Training Loss:  0.8459808230400085
Epoch:  9 / 10
Training Loss:  0.8901092410087585
Epoch:  10 / 10
Training Loss:  0.7559152245521545


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▂▄▅▆▇▇▇██
train loss,█▄▅▂▂▃▃▂▂▁
valid accuracy,▁▃▅▆▆▇▇▇██

0,1
epoch,10.0
train accuracy,25.18359
train loss,0.75592
valid accuracy,28.07617


[34m[1mwandb[0m: Agent Starting Run: hpzxbfx1 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_rnn_nl_2_hs_128_e_15_bd_False_dr_0.2_ems_128
Epoch:  1 / 15
Training Loss:  2.906339168548584
Epoch:  2 / 15
Training Loss:  3.0564560890197754
Epoch:  3 / 15
Training Loss:  3.056919574737549
Epoch:  4 / 15
Training Loss:  2.820321559906006
Epoch:  5 / 15
Training Loss:  3.0272915363311768
Epoch:  6 / 15
Training Loss:  2.8388559818267822
Epoch:  7 / 15
Training Loss:  2.7145602703094482
Epoch:  8 / 15
Training Loss:  3.029923677444458
Epoch:  9 / 15
Training Loss:  2.812329053878784
Epoch:  10 / 15
Training Loss:  2.8527867794036865
Epoch:  11 / 15
Training Loss:  2.7460644245147705
Epoch:  12 / 15
Training Loss:  2.836313486099243
Epoch:  13 / 15
Training Loss:  3.1494362354278564
Epoch:  14 / 15
Training Loss:  2.952622652053833
Epoch:  15 / 15
Training Loss:  2.8214633464813232


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train loss,▄▇▇▃▆▃▁▆▃▃▂▃█▅▃
valid accuracy,▁▁█▁█▁▁█▁▁▁▁▁▁▁

0,1
epoch,15.0
train accuracy,0.0
train loss,2.82146
valid accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h9zh679i with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 2


cell_rnn_nl_2_hs_256_e_15_bd_False_dr_0.2_ems_64
Epoch:  1 / 15
Training Loss:  2.859100580215454
Epoch:  2 / 15
Training Loss:  2.8683297634124756
Epoch:  3 / 15
Training Loss:  2.858440637588501
Epoch:  4 / 15
Training Loss:  2.9910736083984375
Epoch:  5 / 15
Training Loss:  3.2265093326568604
Epoch:  6 / 15
Training Loss:  2.8445451259613037
Epoch:  7 / 15
Training Loss:  2.8930065631866455
Epoch:  8 / 15
Training Loss:  2.661224365234375
Epoch:  9 / 15
Training Loss:  2.882998466491699
Epoch:  10 / 15
Training Loss:  2.751862049102783
Epoch:  11 / 15
Training Loss:  2.7757656574249268
Epoch:  12 / 15
Training Loss:  2.804786443710327
Epoch:  13 / 15
Training Loss:  2.9659037590026855
Epoch:  14 / 15
Training Loss:  2.733224391937256
Epoch:  15 / 15
Training Loss:  2.6718266010284424


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train loss,▃▄▃▅█▃▄▁▄▂▂▃▅▂▁
valid accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,15.0
train accuracy,0.0
train loss,2.67183
valid accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 665y5chs with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666899498, max=1.0…

cell_gru_nl_4_hs_64_e_15_bd_True_dr_0.5_ems_128
Epoch:  1 / 15
Training Loss:  2.123441696166992
Epoch:  2 / 15
Training Loss:  1.6293542385101318
Epoch:  3 / 15
Training Loss:  1.2994298934936523
Epoch:  4 / 15
Training Loss:  1.1630849838256836
Epoch:  5 / 15
Training Loss:  1.2032283544540405
Epoch:  6 / 15
Training Loss:  1.3733172416687012
Epoch:  7 / 15
Training Loss:  1.2392295598983765
Epoch:  8 / 15
Training Loss:  1.3665441274642944
Epoch:  9 / 15
Training Loss:  0.9769058227539062
Epoch:  10 / 15
Training Loss:  0.8990507125854492
Epoch:  11 / 15
Training Loss:  0.8930866122245789
Epoch:  12 / 15
Training Loss:  1.1463818550109863
Epoch:  13 / 15
Training Loss:  0.8204144835472107
Epoch:  14 / 15
Training Loss:  0.9367572069168091
Epoch:  15 / 15
Training Loss:  0.9828773140907288


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▂▃▄▅▅▆▆▆▇▇▇█▇█
train loss,█▅▄▃▃▄▃▄▂▁▁▃▁▂▂
valid accuracy,▁▂▄▄▅▆▆▆▆▇▇▇███

0,1
epoch,15.0
train accuracy,21.375
train loss,0.98288
valid accuracy,23.07129


[34m[1mwandb[0m: Agent Starting Run: i3dn0dao with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_gru_nl_3_hs_256_e_10_bd_False_dr_0.2_ems_128
Epoch:  1 / 10
Training Loss:  1.14601731300354
Epoch:  2 / 10
Training Loss:  0.8676307201385498
Epoch:  3 / 10
Training Loss:  0.6384613513946533
Epoch:  4 / 10
Training Loss:  0.6021820306777954
Epoch:  5 / 10
Training Loss:  0.6409071087837219
Epoch:  6 / 10
Training Loss:  0.6644165515899658
Epoch:  7 / 10
Training Loss:  0.612616240978241
Epoch:  8 / 10
Training Loss:  0.7411858439445496
Epoch:  9 / 10
Training Loss:  0.7358834743499756
Epoch:  10 / 10
Training Loss:  0.5619580745697021


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▃▄▅▆▇▇▇██
train loss,█▅▂▁▂▂▂▃▃▁
valid accuracy,▁▄▆▅▇▇▇▇█▇

0,1
epoch,10.0
train accuracy,42.76367
train loss,0.56196
valid accuracy,31.66504


[34m[1mwandb[0m: Agent Starting Run: m0y1znix with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666414434, max=1.0…

cell_gru_nl_2_hs_64_e_15_bd_True_dr_0.2_ems_128
Epoch:  1 / 15
Training Loss:  1.6803737878799438
Epoch:  2 / 15
Training Loss:  1.4259692430496216
Epoch:  3 / 15
Training Loss:  1.1833443641662598
Epoch:  4 / 15
Training Loss:  1.1335093975067139
Epoch:  5 / 15
Training Loss:  1.1137170791625977
Epoch:  6 / 15
Training Loss:  0.9183403849601746
Epoch:  7 / 15
Training Loss:  1.065302848815918
Epoch:  8 / 15
Training Loss:  0.8142863512039185
Epoch:  9 / 15
Training Loss:  0.8079179525375366
Epoch:  10 / 15
Training Loss:  0.7516563534736633
Epoch:  11 / 15
Training Loss:  0.7957403063774109
Epoch:  12 / 15
Training Loss:  1.095672369003296
Epoch:  13 / 15
Training Loss:  0.9793843030929565
Epoch:  14 / 15
Training Loss:  0.6985803842544556
Epoch:  15 / 15
Training Loss:  0.7513540387153625


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▂▄▅▅▆▆▇▇▇█▇███
train loss,█▆▄▄▄▃▄▂▂▁▂▄▃▁▁
valid accuracy,▁▃▄▅▆▆▇▇▇▇█▇███

0,1
epoch,15.0
train accuracy,25.94531
train loss,0.75135
valid accuracy,25.85449


[34m[1mwandb[0m: Agent Starting Run: p9jjdy5k with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_rnn_nl_2_hs_128_e_10_bd_True_dr_0.3_ems_64
Epoch:  1 / 10
Training Loss:  2.2327818870544434
Epoch:  2 / 10
Training Loss:  1.8149869441986084
Epoch:  3 / 10
Training Loss:  1.6540616750717163
Epoch:  4 / 10
Training Loss:  1.6386373043060303
Epoch:  5 / 10
Training Loss:  1.4926972389221191
Epoch:  6 / 10
Training Loss:  1.4533902406692505
Epoch:  7 / 10
Training Loss:  1.3148356676101685
Epoch:  8 / 10
Training Loss:  1.3938720226287842
Epoch:  9 / 10
Training Loss:  1.2117621898651123
Epoch:  10 / 10
Training Loss:  1.1607050895690918


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▁▂▄▅▅█▇▆▆
train loss,█▅▄▄▃▃▂▃▁▁
valid accuracy,▁▂▂▅▆▅█▇▆▄

0,1
epoch,10.0
train accuracy,2.25
train loss,1.16071
valid accuracy,2.05078


[34m[1mwandb[0m: Agent Starting Run: l952iyqf with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_rnn_nl_3_hs_256_e_10_bd_False_dr_0.3_ems_128
Epoch:  1 / 10
Training Loss:  3.0161850452423096
Epoch:  2 / 10
Training Loss:  2.8725509643554688


0,1
epoch,▁
train accuracy,▁
train loss,▁
valid accuracy,▁

0,1
epoch,1.0
train accuracy,0.0
train loss,3.01619
valid accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: idi00vj3 with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 2


cell_rnn_nl_2_hs_256_e_20_bd_False_dr_0.2_ems_128
Epoch:  1 / 20
Training Loss:  2.9550511837005615


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: 592akbfw with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_3_hs_128_e_15_bd_False_dr_0.3_ems_128
Epoch:  1 / 15
Training Loss:  1.6223479509353638
Epoch:  2 / 15
Training Loss:  1.25441575050354
Epoch:  3 / 15
Training Loss:  1.0323632955551147
Epoch:  4 / 15
Training Loss:  1.0873268842697144
Epoch:  5 / 15
Training Loss:  0.8721511363983154
Epoch:  6 / 15
Training Loss:  1.0782721042633057
Epoch:  7 / 15
Training Loss:  0.7993192076683044
Epoch:  8 / 15
Training Loss:  0.894338071346283
Epoch:  9 / 15
Training Loss:  0.6766569018363953
Epoch:  10 / 15
Training Loss:  0.7056446671485901
Epoch:  11 / 15
Training Loss:  0.6642476916313171
Epoch:  12 / 15
Training Loss:  1.0998976230621338
Epoch:  13 / 15
Training Loss:  0.6894945502281189
Epoch:  14 / 15
Training Loss:  0.7424607872962952
Epoch:  15 / 15
Training Loss:  1.0446124076843262


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▃▄▅▆▆▆▆▇▇▇▇▇▇█
train loss,█▅▄▄▃▄▂▃▁▁▁▄▁▂▄
valid accuracy,▁▃▅▆▇▇▇▇███████

0,1
epoch,15.0
train accuracy,30.60352
train loss,1.04461
valid accuracy,29.5166


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: yzx3d5s5 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_rnn_nl_4_hs_64_e_15_bd_True_dr_0.5_ems_64
Epoch:  1 / 15
Training Loss:  2.605907678604126
Epoch:  2 / 15
Training Loss:  2.359367847442627
Epoch:  3 / 15
Training Loss:  2.3133718967437744
Epoch:  4 / 15
Training Loss:  2.2562217712402344
Epoch:  5 / 15
Training Loss:  1.9929231405258179
Epoch:  6 / 15
Training Loss:  1.886419653892517
Epoch:  7 / 15
Training Loss:  2.1648247241973877


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▄▅▇█
train accuracy,█▁▁▁▁▆
train loss,█▆▅▅▂▁
valid accuracy,▁▁▁▁▁▁

0,1
epoch,6.0
train accuracy,0.00391
train loss,1.88642
valid accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: ya27qc51 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_3_hs_64_e_20_bd_True_dr_0.2_ems_256
Epoch:  1 / 20
Training Loss:  1.6133095026016235
Epoch:  2 / 20
Training Loss:  1.192320704460144
Epoch:  3 / 20
Training Loss:  1.1070060729980469
Epoch:  4 / 20
Training Loss:  1.229384183883667
Epoch:  5 / 20
Training Loss:  0.9654418230056763
Epoch:  6 / 20
Training Loss:  0.9595295190811157
Epoch:  7 / 20
Training Loss:  0.8427926898002625
Epoch:  8 / 20
Training Loss:  1.1826220750808716
Epoch:  9 / 20
Training Loss:  0.7530908584594727
Epoch:  10 / 20
Training Loss:  0.7856614589691162
Epoch:  11 / 20
Training Loss:  0.958611786365509
Epoch:  12 / 20
Training Loss:  0.7038587331771851
Epoch:  13 / 20
Training Loss:  0.7313500642776489
Epoch:  14 / 20
Training Loss:  0.6485721468925476
Epoch:  15 / 20
Training Loss:  0.6218316555023193
Epoch:  16 / 20
Training Loss:  0.5377553701400757
Epoch:  17 / 20
Training Loss:  0.7473512887954712
Epoch:  18 / 20
Training Loss:  0.815787672996521
Epoch:  19 / 20
Training Loss:  0.6104170680046

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train accuracy,▁▂▃▄▅▅▆▆▆▇▆▇▇▇▇▇████
train loss,█▅▅▆▄▄▃▅▂▃▄▂▂▂▂▁▂▃▁▂
valid accuracy,▁▃▄▅▆▆▆▇▇▇▇▇▇███████

0,1
epoch,20.0
train accuracy,34.36719
train loss,0.64591
valid accuracy,32.15332


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kxuciuuc with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

cell_gru_nl_4_hs_64_e_10_bd_True_dr_0.2_ems_256
Epoch:  1 / 10
Training Loss:  1.584450602531433
Epoch:  2 / 10
Training Loss:  1.2413936853408813
Epoch:  3 / 10
Training Loss:  1.099813461303711
Epoch:  4 / 10
Training Loss:  1.0645358562469482
Epoch:  5 / 10
Training Loss:  0.8421748280525208
Epoch:  6 / 10
Training Loss:  1.0832549333572388
Epoch:  7 / 10
Training Loss:  0.9699397683143616
Epoch:  8 / 10
Training Loss:  0.9637435078620911
Epoch:  9 / 10
Training Loss:  1.1120182275772095
Epoch:  10 / 10
Training Loss:  0.6645117998123169


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train accuracy,▁▃▄▅▆▇▇███
train loss,█▅▄▄▂▄▃▃▄▁
valid accuracy,▁▃▅▆▇▇▇███

0,1
epoch,10.0
train accuracy,27.4375
train loss,0.66451
valid accuracy,26.9043


[34m[1mwandb[0m: Agent Starting Run: 3nuonyvs with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 256
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_rnn_nl_3_hs_128_e_15_bd_True_dr_0.5_ems_256
Epoch:  1 / 15
Training Loss:  2.453934669494629
Epoch:  2 / 15
Training Loss:  2.074558734893799
Epoch:  3 / 15
Training Loss:  2.0612730979919434
Epoch:  4 / 15
Training Loss:  1.8426021337509155
Epoch:  5 / 15
Training Loss:  1.8984858989715576
Epoch:  6 / 15
Training Loss:  1.844204068183899
Epoch:  7 / 15
Training Loss:  1.583664894104004
Epoch:  8 / 15
Training Loss:  1.8430020809173584
Epoch:  9 / 15
Training Loss:  1.6449400186538696
Epoch:  10 / 15
Training Loss:  1.8254330158233643
Epoch:  11 / 15
Training Loss:  1.640116572380066
Epoch:  12 / 15
Training Loss:  1.7403066158294678
Epoch:  13 / 15
Training Loss:  1.6900806427001953
Epoch:  14 / 15
Training Loss:  1.6680458784103394
Epoch:  15 / 15
Training Loss:  1.5814579725265503


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train accuracy,▁▁▁▁▁▁▂▅▃▅▇▇▅█▅
train loss,█▅▅▃▄▃▁▃▂▃▁▂▂▂▁
valid accuracy,▁▁▁▁▁▁▂▆▃▆▇▇▆█▅

0,1
epoch,15.0
train accuracy,1.32812
train loss,1.58146
valid accuracy,2.29492


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l329q67f with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	num_epochs: 20
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_4_hs_256_e_20_bd_True_dr_0.2_ems_64
Epoch:  1 / 20
Training Loss:  1.0195107460021973
Epoch:  2 / 20
Training Loss:  0.7573160529136658
Epoch:  3 / 20
Training Loss:  0.6126053929328918
Epoch:  4 / 20
Training Loss:  0.6353828310966492
Epoch:  5 / 20
Training Loss:  0.642772376537323
Epoch:  6 / 20
Training Loss:  0.3763832151889801
Epoch:  7 / 20
Training Loss:  0.6234275698661804
Epoch:  8 / 20
Training Loss:  0.6609474420547485
Epoch:  9 / 20
Training Loss:  0.5328246355056763
Epoch:  10 / 20
Training Loss:  0.6689972877502441
Epoch:  11 / 20
Training Loss:  0.5389975905418396
Epoch:  12 / 20
Training Loss:  0.43946561217308044


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▂▃▄▅▅▆▇▇█
train accuracy,▁▃▄▅▆▆▇▇▇▇█
train loss,█▅▄▄▄▁▄▄▃▄▃
valid accuracy,▁▅▆▆▇██▇█▇█

0,1
epoch,11.0
train accuracy,54.48633
train loss,0.539
valid accuracy,35.96191


[34m[1mwandb[0m: Agent Starting Run: k543uo6s with config:
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	num_epochs: 15
[34m[1mwandb[0m: 	num_layers: 4


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

cell_gru_nl_4_hs_64_e_15_bd_False_dr_0.2_ems_128
Epoch:  1 / 15
Training Loss:  2.5902512073516846


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

[34m[1mwandb[0m: Agent Starting Run: 4pgd5da3 with config:
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 128
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 3


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666899498, max=1.0…

cell_rnn_nl_3_hs_128_e_10_bd_True_dr_0.3_ems_128
Epoch:  1 / 10
Training Loss:  2.000258445739746
Epoch:  2 / 10
Training Loss:  1.6149288415908813


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁
train accuracy,▁
train loss,▁
valid accuracy,▁

0,1
epoch,1.0
train accuracy,0.0
train loss,2.00026
valid accuracy,0.0


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
