In [16]:
import torch
from torch import nn
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import copy
from torch.utils.data import Dataset, DataLoader
import gc
import random
import wandb

In [17]:
wandb.login(key="62cfafb7157dfba7fdd6132ac9d757ccd913aaaf")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Checking if CUDA is available, else use CPU
print(device)  # Printing the device being used (CUDA or CPU)
END_TOKEN = '>'  # Defining the end token for sequences
START_TOKEN = '<'  # Defining the start token for sequences
PAD_TOKEN = '_'  # Defining the padding token for sequences
TEACHER_FORCING_RATIO = 0.5  # Ratio of teacher forcing during training

# Paths to the train, test, and validation CSV files
train_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_train.csv"
test_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_test.csv"
val_csv = "/kaggle/input/aksh11/aksharantar_sampled/tel/tel_valid.csv"

# Reading the train, test, and validation CSV files into pandas dataframes
train_df = pd.read_csv(train_csv, header=None)
test_df = pd.read_csv(test_csv, header=None)
val_df = pd.read_csv(val_csv, header=None)

# Extracting source and target sequences from train, test, and validation dataframes
train_source, train_target = train_df[0].to_numpy(), train_df[1].to_numpy()
val_source, val_target = val_df[0].to_numpy(), val_df[1].to_numpy()

cuda


In [19]:
# Function to add padding to source sequences
def add_padding(source_data, MAX_LENGTH):
    """
    Add padding to source sequences and truncate if necessary.
    
    Args:
    - source_data: List of source sequences
    - MAX_LENGTH: Maximum length of source sequences
    
    Returns:
    - padded_source_strings: List of padded source sequences
    """
    padded_source_strings = []
    for i in range(len(source_data)):
        source_str = START_TOKEN + source_data[i] + END_TOKEN  # Add start and end tokens
        source_str = source_str[:MAX_LENGTH]  # Truncate if longer than MAX_LENGTH
        source_str += PAD_TOKEN * (MAX_LENGTH - len(source_str))  # Pad with PAD_TOKEN

        padded_source_strings.append(source_str)
        
    return padded_source_strings


# Function to convert source strings to sequences of indices
def generate_string_to_sequence(source_data, source_char_index_dict):
    """
    Convert source strings to sequences of indices using char_index_dict.
    
    Args:
    - source_data: List of padded source strings
    - source_char_index_dict: Dictionary mapping characters to their indices
    
    Returns:
    - source_sequences: Padded sequence of character indices
    """
    source_sequences = []
    for i in range(len(source_data)):
        source_sequences.append(get_chars(source_data[i], source_char_index_dict))
    source_sequences = pad_sequence(source_sequences, batch_first=True, padding_value=2)
    return source_sequences


# Function to convert characters to their corresponding indices
def get_chars(string, char_index_dict):
    """
    Convert characters in a string to their corresponding indices using char_index_dict.
    
    Args:
    - string: Input string
    - char_index_dict: Dictionary mapping characters to their indices
    
    Returns:
    - chars_indexes: List of character indices
    """
    chars_indexes = []
    for char in string:
        chars_indexes.append(char_index_dict[char])
    return torch.tensor(chars_indexes, device=device)


# Preprocess the data, including adding padding, generating sequences, and updating dictionaries
def preprocess_data(source_data, target_data):
    """
    Preprocess source and target data.
    
    Args:
    - source_data: List of source strings
    - target_data: List of target strings
    
    Returns:
    - data: Preprocessed data dictionary
    """
    data = {
        "source_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "target_chars": [START_TOKEN, END_TOKEN, PAD_TOKEN],
        "source_char_index": {START_TOKEN: 0, END_TOKEN: 1, PAD_TOKEN: 2},
        "source_index_char": {0: START_TOKEN, 1: END_TOKEN, 2: PAD_TOKEN},
        "target_char_index": {START_TOKEN: 0, END_TOKEN: 1, PAD_TOKEN: 2},
        "target_index_char": {0: START_TOKEN, 1: END_TOKEN, 2: PAD_TOKEN},
        "source_len": 3,
        "target_len": 3,
        "source_data": source_data,
        "target_data": target_data,
        "source_data_seq": [],
        "target_data_seq": []
    }
    
    # Calculate the maximum length of input and output sequences
    data["INPUT_MAX_LENGTH"] = max(len(string) for string in source_data) + 2
    data["OUTPUT_MAX_LENGTH"] = max(len(string) for string in target_data) + 2

    # Pad the source and target sequences and update character dictionaries
    padded_source_strings = add_padding(source_data, data["INPUT_MAX_LENGTH"])
    padded_target_strings = add_padding(target_data, data["OUTPUT_MAX_LENGTH"])
    
    for i in range(len(padded_source_strings)):
        for char in padded_source_strings[i]:
            if data["source_char_index"].get(char) is None:
                data["source_chars"].append(char)
                idx = len(data["source_chars"]) - 1
                data["source_char_index"][char] = idx
                data["source_index_char"][idx] = char
        for char in padded_target_strings[i]:
            if data["target_char_index"].get(char) is None:
                data["target_chars"].append(char)
                idx = len(data["target_chars"]) - 1
                data["target_char_index"][char] = idx
                data["target_index_char"][idx] = char

    # Generate sequences of indexes for source and target data
    data['source_data_seq'] = generate_string_to_sequence(padded_source_strings, data['source_char_index'])
    data['target_data_seq'] = generate_string_to_sequence(padded_target_strings, data['target_char_index'])
    
    # Update lengths of source and target character lists
    data["source_len"] = len(data["source_chars"])
    data["target_len"] = len(data["target_chars"])
    
    return data


In [20]:
def get_cell_type(cell_type):
    # Function to return the appropriate RNN cell based on the specified type
    if(cell_type == "RNN"):
        return nn.RNN
    elif(cell_type == "LSTM"):
        return nn.LSTM
    elif(cell_type == "GRU"):
        return nn.GRU
    else:
        print("Specify correct cell type")

class Attention(nn.Module):
    def __init__(self, hidden_size):
        # Initialize the attention mechanism module
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_size, hidden_size)
        self.Ua = nn.Linear(hidden_size, hidden_size)
        self.Va = nn.Linear(hidden_size, 1)

    def forward(self, query, keys):
        # Forward pass of the attention mechanism
        scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))
        scores = scores.squeeze().unsqueeze(1)
        weights = F.softmax(scores, dim=0)
        weights = weights.permute(2,1,0)
        keys = keys.permute(1,0,2)
        context = torch.bmm(weights, keys)
        return context, weights

class Encoder(nn.Module):
    def __init__(self, h_params, data, device ):
        # Initialize the Encoder module
        super(Encoder, self).__init__()
        # Embedding layer for input characters
        self.embedding = nn.Embedding(data["source_len"], h_params["char_embd_dim"])
        # RNN cell for encoding
        self.cell = get_cell_type(h_params["cell_type"])(h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        self.device=device
        self.h_params = h_params
        self.data = data
        
    def forward(self, input , encoder_curr_state):
        # Forward pass of the Encoder module
        input_length = self.data["INPUT_MAX_LENGTH"]
        batch_size = self.h_params["batch_size"]
        hidden_neurons = self.h_params["hidden_layer_neurons"]
        layers = self.h_params["number_of_layers"]
        encoder_states  = torch.zeros(input_length, layers, batch_size, hidden_neurons, device=self.device )
        for i in range(input_length):
            current_input = input[:, i].view(batch_size,1)
            _, encoder_curr_state = self.forward_step(current_input, encoder_curr_state)
            if self.h_params["cell_type"] == "LSTM":
                encoder_states[i] = encoder_curr_state[1]
            else:
                encoder_states[i] = encoder_curr_state
        return encoder_states, encoder_curr_state
    
    def forward_step(self, current_input, prev_state):
        # Perform forward pass for one time step
        embd_input = self.embedding(current_input)
        output, prev_state = self.cell(embd_input, prev_state)
        return output, prev_state
        
    def getInitialState(self):
        # Initialize initial hidden state for encoder
        return torch.zeros(self.h_params["number_of_layers"],self.h_params["batch_size"],self.h_params["hidden_layer_neurons"], device=self.device)

class Decoder(nn.Module):
    def __init__(self, h_params, data,device):
        # Initialize the Decoder module
        super(Decoder, self).__init__()
        # Attention mechanism
        self.attention = Attention(h_params["hidden_layer_neurons"]).to(device)
        # Embedding layer for target characters
        self.embedding = nn.Embedding(data["target_len"], h_params["char_embd_dim"])
        # RNN cell for decoding
        self.cell = get_cell_type(h_params["cell_type"])(h_params["hidden_layer_neurons"] +h_params["char_embd_dim"], h_params["hidden_layer_neurons"],num_layers=h_params["number_of_layers"], batch_first=True)
        # Fully connected layer for output
        self.fc = nn.Linear(h_params["hidden_layer_neurons"], data["target_len"])
        # Softmax activation for output probabilities
        self.softmax = nn.LogSoftmax(dim=2)
        self.h_params = h_params
        self.data = data
        self.device = device

    def forward(self, decoder_current_state, encoder_final_layers, target_batch, loss_fn, teacher_forcing_enabled=True):
        # Forward pass of the Decoder module
        batch_size = self.h_params["batch_size"]
        decoder_current_input = torch.full((batch_size,1),self.data["target_char_index"][START_TOKEN], device=self.device)
        embd_input = self.embedding(decoder_current_input)
        curr_embd = F.relu(embd_input)
        decoder_actual_output = []
        attentions = []
        loss = 0
        
        use_teacher_forcing = False
        if(teacher_forcing_enabled):
            use_teacher_forcing = True if random.random() < TEACHER_FORCING_RATIO else False
        for i in range(self.data["OUTPUT_MAX_LENGTH"]):
            # Perform one step of decoding
            decoder_output, decoder_current_state, attn_weights = self.forward_step(decoder_current_input, decoder_current_state, encoder_final_layers)
            attentions.append(attn_weights)
            topv, topi = decoder_output.topk(1)
            decoder_current_input = topi.squeeze().detach()
            decoder_actual_output.append(decoder_current_input)

            if(target_batch==None):
                decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
            else:
                curr_target_chars = target_batch[:, i]
                if(i<self.data["OUTPUT_MAX_LENGTH"]-1):
                    if use_teacher_forcing:
                        decoder_current_input = target_batch[:, i+1].view(self.h_params["batch_size"], 1)
                    else:
                        decoder_current_input = decoder_current_input.view(self.h_params["batch_size"], 1)
                decoder_output = decoder_output[:, -1, :]
                loss+=(loss_fn(decoder_output, curr_target_chars))

        decoder_actual_output = torch.cat(decoder_actual_output,dim=0).view(self.data["OUTPUT_MAX_LENGTH"], self.h_params["batch_size"]).transpose(0,1)

        correct = (decoder_actual_output == target_batch).all(dim=1).sum().item()
        return decoder_actual_output, attentions, loss, correct
    
    def forward_step(self, current_input, prev_state, encoder_final_layers):
        # Perform one step of decoding
        embd_input = self.embedding(current_input)
        if self.h_params["cell_type"] == "LSTM":
            context , attn_weights = self.attention(prev_state[1][-1,:,:], encoder_final_layers)
        else:
            context , attn_weights = self.attention(prev_state[-1,:,:], encoder_final_layers)
        curr_embd = F.relu(embd_input)
        input_gru = torch.cat((curr_embd, context), dim=2)
        output, prev_state = self.cell(input_gru, prev_state)
        output = self.softmax(self.fc(output))
        return output, prev_state, attn_weights


In [21]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.source_data_seq = data[0]
        self.target_data_seq = data[1]
    
    def __len__(self):
        return len(self.source_data_seq)
    
    def __getitem__(self, idx):
        source_data = self.source_data_seq[idx]
        target_data = self.target_data_seq[idx]
        return source_data, target_data


In [22]:
def evaluate(encoder, decoder, data, dataloader, device, h_params, loss_fn, use_teacher_forcing = False):
    # Function to evaluate the performance of the model on a dataset
    correct_predictions = 0
    total_loss = 0
    total_predictions = len(dataloader.dataset)
    number_of_batches = len(dataloader)
    encoder.eval()
    decoder.eval()
    
    with torch.no_grad():
        for batch_num, (source_batch, target_batch) in enumerate(dataloader):

            encoder_initial_state = encoder.getInitialState()
            if h_params["cell_type"] == "LSTM":
                encoder_initial_state = (encoder_initial_state, encoder.getInitialState())
            encoder_states, encoder_final_state = encoder(source_batch,encoder_initial_state)

            decoder_current_state = encoder_final_state
            encoder_final_layer_states = encoder_states[:, -1, :, :]

            loss = 0
            correct = 0

            decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)

            correct_predictions+=correct
            total_loss +=loss

        accuracy = correct_predictions / total_predictions
        total_loss /= number_of_batches

        return accuracy, total_loss


In [23]:
def make_strings(data, source, target, output):
    # Function to convert indices to strings for source, target, and output sequences
    source_string = ""
    target_string = ""
    output_string = ""
    for i in source:
        source_string+=(data['source_index_char'][i.item()])
    for i in target:
        target_string+=(data['target_index_char'][i.item()])
    for i in output:
        output_string+=(data['target_index_char'][i.item()])
    return source_string, target_string, output_string


def train_loop(encoder, decoder,h_params, data, data_loader, device, val_dataloader, use_teacher_forcing=True):
    # Function to train the encoder-decoder model
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=h_params["learning_rate"])
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=h_params["learning_rate"])
    
    loss_fn = nn.NLLLoss()
    
    total_predictions = len(data_loader.dataset)
    total_batches = len(data_loader)
    
    for ep in range(h_params["epochs"]):
        total_correct = 0
        total_loss = 0
        encoder.train()
        decoder.train()
        for batch_num, (source_batch, target_batch) in enumerate(data_loader):
            encoder_initial_state = encoder.getInitialState()
            
            if h_params["cell_type"] == "LSTM":
                encoder_initial_state = (encoder_initial_state, encoder.getInitialState())
            encoder_states, encoder_final_state = encoder(source_batch,encoder_initial_state)
            
            decoder_current_state = encoder_final_state
            encoder_final_layer_states = encoder_states[:, -1, :, :]
            
            
            loss = 0
            correct = 0
            
            decoder_output, attentions, loss, correct = decoder(decoder_current_state, encoder_final_layer_states, target_batch, loss_fn, use_teacher_forcing)
            total_correct +=correct
            total_loss += loss.item()/data["OUTPUT_MAX_LENGTH"]
            
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()
            
            
        train_acc = total_correct/total_predictions
        train_loss = total_loss/total_batches
        val_acc, val_loss = evaluate(encoder, decoder, data, val_dataloader,device, h_params, loss_fn, False)
        print("ep: ", ep, " train acc:", train_acc, " train loss:", train_loss, " val acc:", val_acc, " val loss:", val_loss.item()/data["OUTPUT_MAX_LENGTH"])
        wandb.log({"train_accuracy":train_acc, "train_loss":train_loss, "val_accuracy":val_acc, "val_loss":val_loss, "epoch":ep})


In [24]:
h_params={
    "char_embd_dim" : 64,  # Dimension of character embeddings
    "hidden_layer_neurons": 512,  # Number of neurons in hidden layers
    "batch_size": 32,  # Batch size
    "number_of_layers": 5,  # Number of layers in the encoder and decoder
    "learning_rate": 0.0001,  # Learning rate for optimization
    "epochs": 25,  # Number of epochs for training
    "cell_type": "RNN",  # Type of RNN cell: RNN, LSTM, GRU
    "dropout": 0.3,  # Dropout probability
    "optimizer": "adam"  # Optimization algorithm: adam, nadam
}

def prepare_dataloaders(train_source, train_target, val_source, val_target, h_params):
    # Preparing data loaders for training and validation
    data = preprocess_data(copy.copy(train_source), copy.copy(train_target))
    
    # Training data
    training_data = [data["source_data_seq"], data['target_data_seq']]
    train_dataset = MyDataset(training_data)
    train_dataloader = DataLoader(train_dataset, batch_size=h_params["batch_size"], shuffle=True)

    # Validation data
    val_padded_source_strings = add_padding(val_source, data["INPUT_MAX_LENGTH"])
    val_padded_target_strings = add_padding(val_target, data["OUTPUT_MAX_LENGTH"])
    val_source_sequences = generate_string_to_sequence(val_padded_source_strings, data['source_char_index'])
    val_target_sequences = generate_string_to_sequence(val_padded_target_strings, data['target_char_index'])
    validation_data = [val_source_sequences, val_target_sequences]
    val_dataset = MyDataset(validation_data)
    val_dataloader = DataLoader(val_dataset, batch_size=h_params["batch_size"], shuffle=True)
    
    return train_dataloader, val_dataloader, data


In [25]:
def train(h_params, data, device, data_loader, val_dataloader, use_teacher_forcing=True):
    encoder = Encoder(h_params, data, device).to(device)
    decoder = Decoder(h_params, data, device).to(device)
    train_loop(encoder, decoder,h_params, data, data_loader,device, val_dataloader, use_teacher_forcing)
    

In [26]:
config = h_params
run = wandb.init(project="DL Assignment 3 With Attention", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config)
train_dataloader, val_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target, h_params)
train(h_params, data, device, train_dataloader, val_dataloader, True)

In [27]:
#Run this cell to run a sweep with appropriate parameters
sweep_params = {
    'method' : 'bayes',
    'name'   : 'DL Assignment 3 With Attention',
    'metric' : {
        'goal' : 'maximize',
        'name' : 'val_accuracy',
    },
    'parameters' : {
        'epochs':{'values' : [15, 20]},
        'learning_rate':{'values' : [0.001, 0.0001]},
        'batch_size':{'values':[32,64, 128]},
        'char_embd_dim':{'values' : [64, 128, 256] } ,
        'number_of_layers':{'values' : [1,2,3,4]},
        'optimizer':{'values':['nadam','adam']},
        'cell_type':{'values' : ["RNN","LSTM", "GRU"]},
        'hidden_layer_neurons':{'values': [ 128, 256, 512]},
        'dropout':{'values': [0,0.2, 0.3]}
    }
}

sweep_id = wandb.sweep(sweep=sweep_params, project="DL Assignment 3 With Attention")
def main():
    wandb.init(project="DL Assignment 3" )
    config = wandb.config
    with wandb.init(project="DL Assignment 3", name=f"{config['cell_type']}_{config['optimizer']}_ep_{config['epochs']}_lr_{config['learning_rate']}_embd_{config['char_embd_dim']}_hid_lyr_neur_{config['hidden_layer_neurons']}_bs_{config['batch_size']}_enc_layers_{config['number_of_layers']}_dec_layers_{config['number_of_layers']}_dropout_{config['dropout']}", config=config):
        train_dataloader, val_dataloader, data = prepare_dataloaders(train_source, train_target, val_source, val_target, config)
        train(config, data, device, train_dataloader, val_dataloader, True)

Create sweep with ID: qfu8kgax
Sweep URL: https://wandb.ai/jaswanth431/DL%20Assignment%203%20With%20Attention/sweeps/qfu8kgax


In [None]:
wandb.agent("f4esgkqv", function=main, count=100)

[34m[1mwandb[0m: Agent Starting Run: lsh7g72b with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	char_embd_dim: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_layers: 2
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>






VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

ep:  0  train acc: 0.060703125  train loss: 1.3598916569611277  val acc: 0.0  val loss: 1.2673139986784563
ep:  1  train acc: 0.385078125  train loss: 0.7451431657859815  val acc: 0.000244140625  val loss: 0.9511200448741084
ep:  2  train acc: 0.480078125  train loss: 0.4399974388406484  val acc: 0.056884765625  val loss: 0.52749604764192
ep:  3  train acc: 0.5425390625  train loss: 0.28035949707152724  val acc: 0.280029296875  val loss: 0.3564677031143852
ep:  4  train acc: 0.64916015625  train loss: 0.19978529172676168  val acc: 0.335205078125  val loss: 0.33012458552484925
ep:  5  train acc: 0.6590625  train loss: 0.1810680220670411  val acc: 0.35888671875  val loss: 0.30621188619862433
ep:  6  train acc: 0.6941015625  train loss: 0.15730468502440298  val acc: 0.379150390625  val loss: 0.30107657805733057
ep:  7  train acc: 0.69990234375  train loss: 0.14846995970130494  val acc: 0.422607421875  val loss: 0.2777045706044073
ep:  8  train acc: 0.7290234375  train loss: 0.129427691588

VBox(children=(Label(value='0.001 MB of 0.025 MB uploaded\r'), FloatProgress(value=0.05228758169934641, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▇▇▇▇▇▇▇▇████████
train_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▂▅▆▆▆▇▇▇▇▇▇▇▇█▇███
val_loss,█▆▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_accuracy,0.80529
train_loss,0.07431
val_accuracy,0.51074
val_loss,5.93209


[34m[1mwandb[0m: Agent Starting Run: b25zweri with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	char_embd_dim: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_layers: 3
[34m[1mwandb[0m: 	optimizer: adam




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

ep:  0  train acc: 0.0  train loss: 1.6767475973004886  val acc: 0.0  val loss: 1.2725757930589758
ep:  1  train acc: 0.001171875  train loss: 1.3597788157152113  val acc: 0.0  val loss: 1.2238353231678838
ep:  2  train acc: 0.1939453125  train loss: 0.9023545388683039  val acc: 0.0  val loss: 1.1987614838973335
ep:  3  train acc: 0.45251953125  train loss: 0.6803721010037093  val acc: 0.000244140625  val loss: 0.9439834097157354
ep:  4  train acc: 0.47955078125  train loss: 0.5796446153310983  val acc: 0.004150390625  val loss: 0.804785272349482
ep:  5  train acc: 0.47662109375  train loss: 0.47940716573239667  val acc: 0.067626953125  val loss: 0.5650688668955928
ep:  6  train acc: 0.5201171875  train loss: 0.33609296744203443  val acc: 0.139892578125  val loss: 0.515300875124724
ep:  7  train acc: 0.5991796875  train loss: 0.24387488683685687  val acc: 0.280029296875  val loss: 0.37906895513119904
ep:  8  train acc: 0.638828125  train loss: 0.21992339828456514  val acc: 0.2578125  v

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▁▃▅▅▅▆▆▇▇▇▇█▇▇█████
train_loss,█▇▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▂▃▅▅▆▅▇▇▇██████
val_loss,██▇▆▅▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_accuracy,0.77203
train_loss,0.09892
val_accuracy,0.46094
val_loss,5.91228


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xrjxt3ya with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	char_embd_dim: 64
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	number_of_layers: 3
[34m[1mwandb[0m: 	optimizer: adam




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

ep:  0  train acc: 0.00029296875  train loss: 1.6407427055939399  val acc: 0.0  val loss: 1.3834874526314114
ep:  1  train acc: 0.28275390625  train loss: 0.9383157526669297  val acc: 0.0  val loss: 1.0804095060929009
ep:  2  train acc: 0.47716796875  train loss: 0.7108431286364792  val acc: 0.00048828125  val loss: 1.0713208240011465
ep:  3  train acc: 0.4951171875  train loss: 0.6165715354842984  val acc: 0.002685546875  val loss: 0.9365818189538043
ep:  4  train acc: 0.495390625  train loss: 0.5328888896982303  val acc: 0.0263671875  val loss: 0.7081696054209834
ep:  5  train acc: 0.488359375  train loss: 0.44866319839154284  val acc: 0.013427734375  val loss: 0.8386660866115404
ep:  6  train acc: 0.5640234375  train loss: 0.34817811860500475  val acc: 0.18310546875  val loss: 0.4393294790516729
ep:  7  train acc: 0.5698046875  train loss: 0.27100872862760145  val acc: 0.16357421875  val loss: 0.47560513537863025
ep:  8  train acc: 0.58654296875  train loss: 0.2657612212439593  val 

VBox(children=(Label(value='0.001 MB of 0.023 MB uploaded\r'), FloatProgress(value=0.05730374296124544, max=1.…

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▄▅▆▆▆▆▆▇▇▇▇▇███████
train_loss,█▅▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▄▄▆▇▇▇▆▇▇█████
val_loss,█▆▆▅▄▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_accuracy,0.74387
train_loss,0.11572
val_accuracy,0.43872
val_loss,6.53249


[34m[1mwandb[0m: Agent Starting Run: hygxi89w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	char_embd_dim: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_layers: 2
[34m[1mwandb[0m: 	optimizer: adam




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

ep:  0  train acc: 0.0283203125  train loss: 1.4718731344782803  val acc: 0.0  val loss: 1.3344588901685632
ep:  1  train acc: 0.28478515625  train loss: 0.885497260430585  val acc: 0.0  val loss: 1.3487915370775305
ep:  2  train acc: 0.4500390625  train loss: 0.6453109680537307  val acc: 0.004150390625  val loss: 0.7819676606551461
ep:  3  train acc: 0.49642578125  train loss: 0.3929376092868978  val acc: 0.162841796875  val loss: 0.4447840400364088
ep:  4  train acc: 0.5783203125  train loss: 0.253743595585794  val acc: 0.271728515625  val loss: 0.3657737814861795
ep:  5  train acc: 0.6508203125  train loss: 0.20211279487237302  val acc: 0.346923828125  val loss: 0.3197191279867421
ep:  6  train acc: 0.67  train loss: 0.17789964214169784  val acc: 0.38037109375  val loss: 0.30554383733998175
ep:  7  train acc: 0.6691796875  train loss: 0.1736332487527524  val acc: 0.390869140625  val loss: 0.28624868392944336
ep:  8  train acc: 0.7132421875  train loss: 0.14435515786278189  val acc: 

VBox(children=(Label(value='0.026 MB of 0.026 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_accuracy,▁▃▅▅▆▇▇▇▇▇▇▇▇███████
train_loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▃▅▆▆▇▆▇▇▇▇▇█▇████
val_loss,██▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
train_accuracy,0.7949
train_loss,0.08358
val_accuracy,0.48926
val_loss,5.49139


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4te04t19 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	char_embd_dim: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_layer_neurons: 512
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	number_of_layers: 2
[34m[1mwandb[0m: 	optimizer: adam




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

ep:  0  train acc: 0.0887890625  train loss: 1.2891621074339605  val acc: 0.0  val loss: 1.202598820561948
ep:  1  train acc: 0.42591796875  train loss: 0.6444516665286016  val acc: 0.01025390625  val loss: 0.6577523687611455
ep:  2  train acc: 0.50552734375  train loss: 0.3508361675926364  val acc: 0.200439453125  val loss: 0.40340983349344006
ep:  3  train acc: 0.62029296875  train loss: 0.21735633962342235  val acc: 0.224853515625  val loss: 0.39969257686449133
ep:  4  train acc: 0.64998046875  train loss: 0.1894767151834723  val acc: 0.36865234375  val loss: 0.31124197918435803
ep:  5  train acc: 0.68708984375  train loss: 0.16205762501385912  val acc: 0.34423828125  val loss: 0.3246242689049762
ep:  6  train acc: 0.705546875  train loss: 0.1461200351055215  val acc: 0.41650390625  val loss: 0.280131547347359
ep:  7  train acc: 0.731328125  train loss: 0.12995931103018246  val acc: 0.421875  val loss: 0.27200524703316065
ep:  8  train acc: 0.73044921875  train loss: 0.1256616770177