In [None]:
import wandb
!wandb login # Enter your API key here

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
%%writefile WordEmbeddings_Translate.py
import torch
import torch.nn as nn

class WordEmbeddings:
    def translateWordToTensor(self, word, vocabulary, language):
        '''
            Parameters:
                word : word on which to create the embeddings
                vocabulary : vocabulary of the dataset
                language : language of the dataset
            Returns :
                trans : embedding of the word
            Function:
                Generates the embeddings
        '''
        tensorList = list()
        if language == "english":
            tensorList.append(
                vocabulary.charToIndexDictForEnglish[vocabulary.startOfSequenceToken]
            )
        else:
            tensorList.append(
                vocabulary.charToIndexDictForBengali[vocabulary.startOfSequenceToken]
            )

        for one_char in word:
            if language == "english":
                tensorList.append(vocabulary.charToIndexDictForEnglish[one_char])
            else:
                tensorList.append(vocabulary.charToIndexDictForBengali[one_char])

        if language == "english":
            tensorList.append(
                vocabulary.charToIndexDictForEnglish[vocabulary.endOfSequenceToken]
            )
        else:
            tensorList.append(
                vocabulary.charToIndexDictForBengali[vocabulary.endOfSequenceToken]
            )

        trans = torch.tensor(tensorList, dtype=torch.int64)
        return trans

Writing WordEmbeddings_Translate.py


In [3]:
%%writefile WordEmbeddings_Create.py
import Utilities_Device_Trainings
import torch
import torch.nn as nn
from WordEmbeddings_Translate import WordEmbeddings as BaseWordEmbeddings

class WordEmbeddings(BaseWordEmbeddings):

    '''class to create the word embeddings'''
    def createWordEmbeddings(self, dataset, vocabulary):
        '''
            Parameters:
                dataset : dataset on which to create the embeddings
                vocabulary : vocabulary of the dataset
            Returns :
                None
            Function:
                Creates embeddings of the words
        '''
        englishDataset = dataset[:, 0]
        bengaliDataset = dataset[:, 1]

        tensorListEnglish = list()
        tensorListBengali = list()

        '''embeddings for source language'''
        language = "english"
        for one_word in englishDataset:
            tensor = self.translateWordToTensor(one_word, vocabulary, language)
            tensor = Utilities_Device_Trainings.setDevice(tensor)
            tensorListEnglish.append(tensor)
        self.englishEmbedding = nn.utils.rnn.pad_sequence(
            tensorListEnglish,
            padding_value=vocabulary.paddingIndex,
            batch_first=True
        )
        self.englishEmbedding = Utilities_Device_Trainings.setDevice(self.englishEmbedding)

        '''embeddings for target language'''
        language = "bengali"
        for one_word in bengaliDataset:
            tensor = self.translateWordToTensor(one_word, vocabulary, language)
            tensor = Utilities_Device_Trainings.setDevice(tensor)
            tensorListBengali.append(tensor)
        self.bengaliEmbedding = nn.utils.rnn.pad_sequence(
            tensorListBengali,
            padding_value=vocabulary.paddingIndex,
            batch_first=True
        )
        self.bengaliEmbedding = Utilities_Device_Trainings.setDevice(self.bengaliEmbedding)

Writing WordEmbeddings_Create.py


In [4]:
%%writefile EncoderArchitecture.py
import Utilities_Layer
import torch.nn as nn

'''class to represent the encoder architecture'''
class EncoderStack(nn.Module):

    '''constructor to intialize the class parameters'''
    def __init__(self, argList):
        '''inherit the constructor of the parent class'''
        super(EncoderStack, self).__init__()
        '''set all the class parameters based on the arguments passed'''
        modelType = argList[0]
        encoderInputSize = argList[1]
        embeddingSize = argList[2]
        neruonsInFC = argList[3]
        layersInEncoder = argList[4]
        dropout = argList[5]
        biDirectional = argList[6]
        attention = argList[7]

        self.neruonsInFC = neruonsInFC
        self.layersInEncoder = layersInEncoder
        if biDirectional == "YES":
            self.biDirect = True
        else:
            self.biDirect = False
        self.attention = attention

        '''select the cell type based on the value passed in argument'''
        model_dict = {"LSTM": nn.LSTM, "GRU": nn.GRU, "RNN": nn.RNN}
        modelObj = model_dict.get(modelType)

        '''do not apply dropout if only one layer is present'''
        if self.layersInEncoder == 1:
            self.dropout = Utilities_Layer.createDropoutLayer(0.0)
            self.model = modelObj(embeddingSize, self.neruonsInFC, self.layersInEncoder, dropout=0.0, bidirectional=self.biDirect)
        else:
            self.dropout = Utilities_Layer.createDropoutLayer(dropout)
            self.model = modelObj(embeddingSize, self.neruonsInFC, self.layersInEncoder, dropout=dropout, bidirectional=self.biDirect)

        '''create ambedding layer'''
        self.embeddingLayer = Utilities_Layer.createEmbeddingLayer(encoderInputSize, embeddingSize)

Writing EncoderArchitecture.py


In [5]:
%%writefile EncoderArchitecture_Forward.py
import Utilities_Tensor
import torch.nn as nn
from EncoderArchitecture import EncoderStack as BaseEncoderStack

'''class to add forward propagation to the encoder architecture'''
class EncoderStack(BaseEncoderStack):

    def forward(self, batchData):
        '''
            Parameters:
                batchData : data sent in batches (as a 2D tensor)
            Returns :
                modelEval : output from the current state of the encoder
                innerLayer : hidden layers representation
                model : the object of the combined architecture with updated parameters
            Function:
                Performs forward propagation in the architecture
        '''

        '''sets embedding layer'''
        embeddedBatch = self.embeddingLayer(batchData)
        embeddedBatch = self.dropout(embeddedBatch)
        model = None

        '''create the gates for LSTM'''
        if isinstance(self.model, nn.LSTM):
            modelEval, (innerLayer, model) = self.model(embeddedBatch)
            '''implement bidirectional architecture'''
            if self.biDirect:
                batchSize = model.size(1)
                model = Utilities_Tensor.resizeTensor(model, self.layersInEncoder, 2, batchSize, -1)
                model = Utilities_Tensor.reverseTensor(model)
                model = Utilities_Tensor.getMean(model)
            else:
                model = model[-1, :, :]
            model = Utilities_Tensor.increaseDimension(model)
        else:
            modelEval, innerLayer = self.model(embeddedBatch)

        '''check and implement bidirectional architecture'''
        if self.biDirect:
            batchSize = innerLayer.size(1)
            innerLayer = Utilities_Tensor.resizeTensor(innerLayer, self.layersInEncoder, 2, batchSize, -1)
            innerLayer = Utilities_Tensor.reverseTensor(innerLayer)
            innerLayer = Utilities_Tensor.getMean(innerLayer)
            '''apply attention'''
            if self.attention == 1:
                modelEval = Utilities_Tensor.addTensor(modelEval[:, :, :self.neruonsInFC], modelEval[:, :, self.neruonsInFC:])
        else:
            innerLayer = innerLayer[-1, :, :]

        innerLayer = Utilities_Tensor.increaseDimension(innerLayer)

        return modelEval, innerLayer, model

Writing EncoderArchitecture_Forward.py


In [6]:
%%writefile DecoderArchitecture.py
import Utilities_Layer
import torch.nn as nn

'''class to represent the decoder architecture'''
class DecoderStack(nn.Module):

    '''constructor to intialize the class parameters'''
    def __init__(self, argList):
        '''inherit the constructor of the parent class'''
        super(DecoderStack, self).__init__()
        '''set all the class parameters based on the arguments passed'''
        modelType = argList[0]
        decoderInputSize = argList[1]
        embeddingSize = argList[2]
        neruonsInFC = argList[3]
        outputWordSize = argList[4]
        layersInDecoder = argList[5]
        dropout = argList[6]
        attention = argList[7]

        self.modelType = modelType
        self.layersInDecoder = layersInDecoder
        self.outputWordSize = outputWordSize
        self.attention = attention
        
        '''select the cell type based on the value passed in argument'''
        modelDict = {"LSTM": nn.LSTM, "GRU": nn.GRU, "RNN": nn.RNN}
        modelObj = modelDict.get(modelType)

        '''apply attention'''
        if self.attention == 0:
            '''do not apply dropout if only one layer is present'''
            if layersInDecoder == 1:
                self.dropout = Utilities_Layer.createDropoutLayer(0.0)
                self.model = modelObj(embeddingSize, neruonsInFC, layersInDecoder, dropout=0.0)
            else:
                self.dropout = Utilities_Layer.createDropoutLayer(dropout)
                self.model = modelObj(embeddingSize, neruonsInFC, layersInDecoder, dropout=dropout)
            self.fullyConnectedLayer = nn.Linear(neruonsInFC, outputWordSize)
        else:
            '''do not apply dropout if only one layer is present'''
            if layersInDecoder == 1:
                self.dropout = Utilities_Layer.createDropoutLayer(0.0)
                self.model = modelObj(embeddingSize + neruonsInFC, neruonsInFC, layersInDecoder, dropout=0.0)
            else:
                self.dropout = Utilities_Layer.createDropoutLayer(dropout)
                self.model = modelObj(embeddingSize + neruonsInFC, neruonsInFC, layersInDecoder, dropout=dropout)
            self.fullyConnectedLayer = nn.Linear(neruonsInFC * 2, outputWordSize)
        
        '''create ambedding and linear layer'''
        self.embeddingLayer = Utilities_Layer.createEmbeddingLayer(decoderInputSize, embeddingSize)
        self.neuronsInAttentionFC = Utilities_Layer.createLinearLayer(neruonsInFC, neruonsInFC, False)

Writing DecoderArchitecture.py


In [7]:
%%writefile DecoderArchitecture_Forward.py
import Utilities_Tensor
import Utilities_Device_Trainings
import torch.nn as nn
from DecoderArchitecture import DecoderStack as BaseDecoderStack

'''class to add forward propagation to the decoder architecture'''
class DecoderStack(BaseDecoderStack):

    def forward(self, batchData, encoderOutput, innerLayer, model):
        '''
            Parameters:
                batchData : data sent in batches (as a 2D tensor)
                encoderOutput : output from the encoder (on which the decoder will work)
                innerLayer : hidden layers representation
                model : the object of the combined architecture on which the decoder is working
            Returns :
                predictions : predicted outputs from the decoder
                innerLayer : hidden layers representation
                model : the object of the combined architecture with updated parameters
                finalAttentionWeights : updated attention weights
            Function:
                Performs forward propagation in the architecture
        '''

        '''sets batch size and embedding layer'''
        batchData = Utilities_Tensor.increaseDimension(batchData)
        embeddedBatch = self.embeddingLayer(batchData)
        embeddingLayer = self.dropout(embeddedBatch)

        '''declare the attention matrix'''
        finalAttentionWeights = None

        '''appply attention and calculate the weights'''
        if self.attention == 1:
            finalOutputFromEncoderBlock = self.neuronsInAttentionFC(encoderOutput)
            finalHiddenLayer = innerLayer[-1:]
            attentionValues = Utilities_Tensor.mutiplyTensors(
                Utilities_Tensor.reorderDimensions(finalOutputFromEncoderBlock, 1, 0, 2),
                Utilities_Tensor.reorderDimensions(finalHiddenLayer, 1, 2, 0)
            )
            attentionValues = Utilities_Tensor.reorderDimensions(attentionValues, 2, 0, 1)
            finalAttentionWeights = Utilities_Device_Trainings.setOutputFunction(attentionValues)
            attentionIntoDecoder = Utilities_Tensor.mutiplyTensors(
                Utilities_Tensor.reorderDimensions(finalAttentionWeights, 1, 0, 2),
                Utilities_Tensor.reorderDimensions(encoderOutput, 1, 0, 2)
            )
            attentionIntoDecoder = Utilities_Tensor.reorderDimensions(attentionIntoDecoder, 1, 0, 2)

        '''check and apply attention'''
        if self.attention == 0:
            '''apply forget gate for LSTM'''
            if isinstance(self.model, nn.LSTM):
                modelEval, (innerLayer, model) = self.model(embeddingLayer, (innerLayer, model))
            else:
                modelEval, innerLayer = self.model(embeddingLayer, innerLayer)
            '''get decoder outputs by passing through the fully connected layer'''
            predictions = self.fullyConnectedLayer(modelEval)
        else:
            '''apply forget gate for LSTM'''
            concatenatedInput = Utilities_Tensor.concatenateTensor(embeddingLayer, attentionIntoDecoder, 2)
            if isinstance(self.model, nn.LSTM):
                modelEval, (innerLayer, model) = self.model(concatenatedInput, (innerLayer, model))
            else:
                modelEval, innerLayer = self.model(concatenatedInput, innerLayer)
            concatenatedInput = Utilities_Tensor.concatenateTensor(modelEval, attentionIntoDecoder, 2)
            '''get decoder outputs by passing through the fully connected layer'''
            predictions = self.fullyConnectedLayer(concatenatedInput)

        predictions = Utilities_Tensor.decreaseDimension(predictions)

        if self.attention == 1:
            finalAttentionWeights = Utilities_Tensor.decreaseDimension(finalAttentionWeights)

        return predictions, innerLayer, model, finalAttentionWeights

Writing DecoderArchitecture_Forward.py


In [8]:
%%writefile CombinedModelArchitecture_Utils.py
import Utilities_Tensor
import torch.nn as nn
import random

def formMatrix(dim1, dim2, dim3):
    '''
        Parameters:
            dim1 : First dimension of a tensor
            dim2 : Second dimension of the tensor
            dim3 : Third dimension of the tensor
        Returns :
            A tensor
        Function:
            Creates a tensor with all zeros following the dimensions passed as parameters
    '''
    return Utilities_Tensor.getZeroTensor(dim1, dim2, dim3)


def doTeacherForcing(decoderOutput, bengaliSequence, sequenceNumber, teacherRatio):
    '''
        Parameters:
            decoderOutput : Tensor representing the output of the previous state of the decoder
            bengaliSequence : Sequence of characters in the target language (bengali in this case)
            sequenceNumber : Index of the sequence to be considered
            teacherRatio : Threshold percentage on whether to apply teacher forching
        Returns :
            The function can return two things:
                if teacher forcing is not applied then return the output of the previous state of the decoder
                else return the actual target word
        Function:
            Performs teacher forcing in the decoder
    '''
    prediction = decoderOutput.argmax(dim=1)

    '''make a random guess and based on that decide whether or not to apply teacher forcing in the current timestamp'''
    currentGuess = random.random()
    if currentGuess < teacherRatio:
        return bengaliSequence[sequenceNumber]
    
    return prediction

Writing CombinedModelArchitecture_Utils.py


In [9]:
%%writefile CombinedModelArchitecture_Stack.py
import Utilities_Device_Trainings
import Utilities_Tensor
import torch.nn as nn
import random
from CombinedModelArchitecture_Utils import formMatrix, doTeacherForcing

'''class to represent the combined architecture of encoder and decoder'''
class EncoderDecoderStack(nn.Module):

    '''constructor to intialize the class parameters'''
    def __init__(self, argList):
        '''inherit the constructor of the parent class'''
        super(EncoderDecoderStack, self).__init__()
        '''encoder object'''
        self.encoderFramework = argList[0]
        '''decoder object'''
        self.decoderFramework = argList[1]
        '''attention(helps to decide whether to apply attention or not)'''
        self.attention = argList[2]
    

    def forward(self, englishSequence, bengaliSequence, teacherRatio=0.5):
        '''
            Parameters:
                englishSequence : Sequence of characters in the source language (english in this case)
                bengaliSequence : Sequence of characters in the target language (bengali in this case)
                teacherRatio : Threshold percentage on whether to apply teacher forching (set to 0.5 if not passed during function call)
            Returns :
                modelEval : output from the architecture
                attentions : updated attention weights
            Function:
                Performs forward propagation in the architecture
        '''

        '''sets batch size and maximum lengths of the words in the source and target dataset'''
        batchSize = Utilities_Tensor.getShapeOfTensor(englishSequence, 1)
        englishSequenceLength = Utilities_Tensor.getShapeOfTensor(englishSequence, 0)
        bengaliSequenceLength = Utilities_Tensor.getShapeOfTensor(bengaliSequence, 0)

        '''sets target vocabulary'''
        bengaliVocabulary = self.decoderFramework.outputWordSize

        '''forms the initial attention and output matrix'''
        attentions = formMatrix(bengaliSequenceLength, batchSize, englishSequenceLength)
        attentions = Utilities_Device_Trainings.setDevice(attentions)
        modelEval = formMatrix(bengaliSequenceLength, batchSize, bengaliVocabulary)
        modelEval = Utilities_Device_Trainings.setDevice(modelEval)

        '''passes the source word into the encoder'''
        encoderOutput, innerLayer, model = self.encoderFramework(englishSequence)

        '''resizes the tensor to match decoder architecture'''
        innerLayer = Utilities_Tensor.expandTensor(innerLayer, self.decoderFramework.layersInDecoder, 1, 1)
        
        '''resize the tensor if the cell is LSTM'''
        if isinstance(self.decoderFramework.model, nn.LSTM):
            model = Utilities_Tensor.expandTensor(model, self.decoderFramework.layersInDecoder, 1, 1)

        '''run the decoder based on whether attention is applied or not'''
        batchData = bengaliSequence[0]
        for sequenceNumber in range(1, bengaliSequenceLength):
            '''if no attention then no need to consider the attention weights being returned by decoder'''
            if self.attention == 0:
                decoderOutput, innerLayer, model, _ = self.decoderFramework(batchData, None, innerLayer, model)
            else:
                decoderOutput, innerLayer, model, attentionWeights = self.decoderFramework(batchData, encoderOutput, innerLayer, model)            
            modelEval[sequenceNumber] = decoderOutput

            '''if attention is applied then store the attention weights'''
            if self.attention == 1:
                attentions[sequenceNumber] = attentionWeights

            '''call teacher forcing function to implement it''' 
            batchData = doTeacherForcing(decoderOutput, bengaliSequence, sequenceNumber, teacherRatio)

        return modelEval, attentions

Writing CombinedModelArchitecture_Stack.py


In [10]:
%%writefile AccuracyAndLoss_Utils.py
import torch

def calculate(modelEval, outputSequence, paddingIndex, lossFunction):
    '''
        Parameters:
            modelEval : output from the model
            outputSequence : original word in the dataset
            paddingIndex : encoding of the padding characters in the vocabulary
            lossFunction : loss function used in the model
        Returns :
            modelEval : updated output to be used in the next batch of data
            correctPredictions : number of words predicted correctly
            totalLoss : loss generated by the current batch
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''

    '''calculate correct predictions'''
    dim = modelEval.shape[2]
    predictedSequence = modelEval.argmax(dim=2)
    acuurate = (predictedSequence == outputSequence) + (outputSequence == paddingIndex)
    acuurate = torch.clamp(acuurate, max=1)
    acuurateAlongOneColumn = acuurate.all(dim=0)
    total = acuurateAlongOneColumn.sum()
    correctPredictions = total.item()

    '''calculate loss'''
    modelEvalSplit = modelEval[1:]
    modelEval = modelEvalSplit.reshape(-1, dim)
    bengaliSequenceSplit = outputSequence[1:]
    bengaliSequence = bengaliSequenceSplit.reshape(-1)
    loss = lossFunction(modelEval, bengaliSequence)
    totalLoss = loss.item()

    return modelEval, correctPredictions, totalLoss

Writing AccuracyAndLoss_Utils.py


In [11]:
%%writefile FindAccuracyAndLoss.py
import Utilities_Device_Trainings
import torch
from AccuracyAndLoss_Utils import calculate

'''class to find the accuracy and loss'''
class FindAccuracyAndLoss:
    def findAccuracyAndLoss(framework, dataLoader, batchSize, paddingIndex):
        '''
            Parameters:
                framework : object of the architecture
                dataLoader : data on which to calculate the accuracy and loss
                batchSize : batch size used
                paddingIndex : encoding of the padding characters in the vocabulary
            Returns :
                averageLoss : average loss across the dataset
                accuracy : accuracy of correct prediction 
            Function:
                Calculates the accuracy percentage and average loss for the dataset
        '''

        '''sets loss function'''
        framework.eval()
        lossFunction = Utilities_Device_Trainings.setLossFunction()
    
        totalLoss = 0.0
        correctPredictions = 0
        
        with torch.no_grad():
            '''iterate the whole dataset'''
            for _, data in enumerate(dataLoader):
                '''get the original source and target word'''
                inputSequence = data[0]
                outputSequence = data[1]
                inputSequence = inputSequence.T
                inputSequence = Utilities_Device_Trainings.setDevice(inputSequence)
                outputSequence = outputSequence.T
                outputSequence = Utilities_Device_Trainings.setDevice(outputSequence)

                '''run the encoder-decoder architecture with no teacher forcing (as we are in inference step)'''
                modelEval, _ = framework(inputSequence, outputSequence, teacherRatio=0.0)
                
                '''calculate the correct predictions and loss for the current batch of data'''
                modelEval, correctBatch, lossBatch = calculate(modelEval, outputSequence, paddingIndex, lossFunction)
                correctPredictions += correctBatch
                totalLoss += lossBatch
            
            '''avergae loss and accuracy percentage'''
            accuracy = correctPredictions / (len(dataLoader) * batchSize)
            averageLoss = totalLoss / len(dataLoader)
            return averageLoss, accuracy

Writing FindAccuracyAndLoss.py


In [12]:
%%writefile RunTrainer_Utils.py
import torch

def modification(modelEval, outputSequence):
    '''
        Parameters:
            modelEval : output from the model
            outputSequence : original target sequence
        Returns :
            modelEval : modified output to use it for other batches
            bengaliSequence : sequence in target language
        Function:
            Changes dimensions of the tensors
    '''
    dim = modelEval.shape[2]
    modelEvalSplit = modelEval[1:]
    modelEval = modelEvalSplit.reshape(-1, dim)
    bengaliSequenceSplit = outputSequence[1:]
    bengaliSequence = bengaliSequenceSplit.reshape(-1)
    return modelEval, bengaliSequence

Writing RunTrainer_Utils.py


In [13]:
%%writefile RunTrainer.py
import torch
from FindAccuracyAndLoss import FindAccuracyAndLoss
import Utilities_Device_Trainings
from torch.nn.utils import clip_grad_norm_
import wandb
from copy import deepcopy
from RunTrainer_Utils import modification

'''class to run the epochs on the model'''
class Trainer:

    def runModelTrainer(paramList, trainPy=0, saveBestModel=0, logging=1):
        '''
            Parameters:
                paramList : list of parameters passes
                trainPy : variable indicating whether to this is train.py call or not
                saveBestModel : variable indicating whether to save the model or not
            Returns :
                None
            Function:
                Drives the training process and run epochs
        '''

        '''set the parameters'''
        framework = paramList[0]
        learningRate = paramList[1]
        trainEmbeddedDataLoader = paramList[2]
        valEmbeddedDataLoader = paramList[3]
        epochs = paramList[4]
        batchSize = paramList[5]
        paddingIndex = paramList[6]

        '''declare lists for storing the accuracies and losses'''
        trainAccuracyPerEpoch = list()
        trainLossPerEpoch = list()
        valAccuracyPerEpoch = list()
        valLossPerEpoch = list()

        '''setting the optimizer'''
        backpropagationFramework = Utilities_Device_Trainings.setOptimizer(framework, learningRate)

        '''setting the loss function'''
        lossFunction = Utilities_Device_Trainings.setLossFunction()

        '''run epochs'''
        for epoch in range(epochs):
            framework.train()
            for id, data in enumerate(trainEmbeddedDataLoader):
                '''get the original source and target words'''
                inputSequence = data[0]
                outputSequence = data[1]
                inputSequence = inputSequence.T
                inputSequence = Utilities_Device_Trainings.setDevice(inputSequence)
                outputSequence = outputSequence.T
                outputSequence = Utilities_Device_Trainings.setDevice(outputSequence)

                '''run the encoder-decoder architecture'''
                modelEval, _ = framework(inputSequence, outputSequence)
                modelEval, bengaliSequence = modification(modelEval, outputSequence)

                '''run backpropagation'''
                backpropagationFramework.zero_grad()
                loss = lossFunction(modelEval, bengaliSequence)
                loss.backward()
                clip_grad_norm_(framework.parameters(), max_norm=1)
                backpropagationFramework.step()
            
            '''calculate the respective loss and accuracy'''
            trainingLoss, trainingAccuracy = FindAccuracyAndLoss.findAccuracyAndLoss(
                framework, trainEmbeddedDataLoader, batchSize, paddingIndex
            )
            valLoss, valAccuracy = FindAccuracyAndLoss.findAccuracyAndLoss(
                framework, valEmbeddedDataLoader, batchSize, paddingIndex
            )

            
            trainLossPerEpoch.append(trainingLoss)
            trainAccuracyPerEpoch.append(trainingAccuracy)
            valLossPerEpoch.append(valLoss)
            valAccuracyPerEpoch.append(valAccuracy)

            if logging == 1:
                '''print and log the losses and accuracies to terminal and wandb respectively'''
                print("\n===================================================================================================================")
                print(f"Epoch : {epoch+1}")
                print(f"Training Accuracy : {trainAccuracyPerEpoch[-1]}")
                print(f"Validation Accuracy : {valAccuracyPerEpoch[-1]}")
                print(f"Training Loss : {trainLossPerEpoch[-1]}")
                print(f"Validation Loss : {valLossPerEpoch[-1]}")  
                wandb.log({
                    "training_accuracy": trainAccuracyPerEpoch[-1],
                    "validation_accuracy": valAccuracyPerEpoch[-1],
                    "training_loss": trainLossPerEpoch[-1],
                    "validation_loss": valLossPerEpoch[-1],
                    "Epoch": epoch+1
                })
            else:
                (f"Epoch : {epoch+1}")
                if epoch == epochs - 1:
                    return framework

        '''save the model if needed'''
        if saveBestModel == 1:
            state = deepcopy(framework.state_dict())
            torch.save(state, "/frameworkState.pth")
        
        if trainPy == 1:
            return framework

Writing RunTrainer.py


In [None]:
%%writefile LoadDataset_Train.py
import pandas as pd
import os

def loadDataset(self, root, lang):
    train_path = os.path.join(root, f"{lang}.translit.sampled.train.tsv")
    val_path   = os.path.join(root, f"{lang}.translit.sampled.dev.tsv")

    train_df = pd.read_csv(train_path, sep="\t", header=None, dtype=str)
    val_df = pd.read_csv(val_path, sep="\t", header=None, dtype=str)

    ''' Drop rows where either source or target is missing'''
    train_df = train_df.dropna(subset=[0, 1])
    val_df = val_df.dropna(subset=[0, 1])

    ''' Select only the first two columns'''
    self.train_dataset = train_df[[1, 0]].values 
    self.val_dataset = val_df[[1, 0]].values

Writing LoadDataset_Train.py


In [None]:
%%writefile LoadDataset_Test.py
import pandas as pd
import os
from LoadDataset_Train import loadDataset

def loadTestDataset(self, root, lang):
    '''
        Parameters:
            root : path of the dataset
            lang : language which is chosen (taken from the path itself)
        Returns :
            None
        Function:
            Loads test dataset
    '''
    test_path = os.path.join(root, f"{lang}.translit.sampled.test.tsv")
    test_df = pd.read_csv(test_path, sep="\t", header=None, dtype=str)
    test_df = test_df.dropna(subset=[0, 1])
    self.test_dataframe = test_df               
    self.test_dataset = test_df[[1, 0]].values      

'''class to load dataset'''
class DatasetLoad:
    pass

'''attach methods to DatasetLoad '''
DatasetLoad.loadDataset = loadDataset
DatasetLoad.loadTestDataset = loadTestDataset

Writing LoadDataset_Test.py


In [16]:
%%writefile PrepareVocabulary_Initialize.py
'''initializes the vocabulary dictionaries'''
def initializeVocabularyDictionaries(self):
    '''
        Parameters:
            None
        Returns :
            None
        Function:
            Initializes the vocabulary dictionaries
    '''

    '''dictionary for source language'''
    self.charToIndexDictForEnglish[self.startOfSequenceToken]=self.startOfSequenceIndex
    self.charToIndexDictForEnglish[self.endOfSequenceToken]=self.endOfSequenceIndex
    self.charToIndexDictForEnglish[self.paddingToken]=self.paddingIndex

    self.indexToCharDictForEnglish[self.startOfSequenceIndex]=self.startOfSequenceToken
    self.indexToCharDictForEnglish[self.endOfSequenceIndex]=self.endOfSequenceToken
    self.indexToCharDictForEnglish[self.paddingIndex]=self.paddingToken

    '''dictionary for target language'''
    self.charToIndexDictForBengali[self.startOfSequenceToken]=self.startOfSequenceIndex
    self.charToIndexDictForBengali[self.endOfSequenceToken]=self.endOfSequenceIndex
    self.charToIndexDictForBengali[self.paddingToken]=self.paddingIndex

    self.indexToCharDictForBengali[self.startOfSequenceIndex]=self.startOfSequenceToken
    self.indexToCharDictForBengali[self.endOfSequenceIndex]=self.endOfSequenceToken
    self.indexToCharDictForBengali[self.paddingIndex]=self.paddingToken

Writing PrepareVocabulary_Initialize.py


In [17]:
%%writefile PrepareVocabulary_Create.py
'''creates vocabulary of each word in the dataset'''
def createVocabulary(self, dataset):
    '''
        Parameters:
            dataset : dataset on which to create the vocabulary
        Returns :
            None
        Function:
            creates vocabulary of each word in the dataset
    '''

    '''iterate over the entire dataset'''
    for each_pair in dataset:
        english_word=each_pair[0]
        bengali_word=each_pair[1]

        '''create vocabulary for the source language'''
        for one_char in english_word:
            '''if the character is not already recorded then add it to the dictionary'''
            if one_char not in self.charToIndexDictForEnglish:
                self.charToIndexDictForEnglish[one_char]=self.vocabularySizeForEnglish
                self.charCounterForEnglish[one_char]=1
                self.indexToCharDictForEnglish[self.vocabularySizeForEnglish]=one_char
                self.vocabularySizeForEnglish+=1
            else:
                self.charCounterForEnglish[one_char]+=1
        
        '''create vocabulary for the target language'''
        for one_char in bengali_word:
            '''if the character is not already recorded then add it to the dictionary'''
            if one_char not in self.charToIndexDictForBengali:
                self.charToIndexDictForBengali[one_char]=self.vocabularySizeForBengali
                self.charCounterForBengali[one_char]=1
                self.indexToCharDictForBengali[self.vocabularySizeForBengali]=one_char
                self.vocabularySizeForBengali+=1
            else:
                self.charCounterForBengali[one_char]+=1

Writing PrepareVocabulary_Create.py


In [18]:
%%writefile PrepareVocabulary_Final.py
from PrepareVocabulary_Initialize import initializeVocabularyDictionaries
from PrepareVocabulary_Create import createVocabulary

'''class to prepare the vocabulary of the dataset'''
class PrepareVocabulary:

    '''constructor to intialize the class parameters'''
    def __init__(self):

        '''define the start token, end token and padding token'''
        self.startOfSequenceToken="~"
        self.endOfSequenceToken="%"
        self.paddingToken="`"
        self.startOfSequenceIndex=0
        self.endOfSequenceIndex=1
        self.paddingIndex=2

        '''current vocabulary size is 3 (start token, end token, padding token)'''
        self.vocabularySizeForEnglish=3
        self.vocabularySizeForBengali=3

        self.charToIndexDictForEnglish=dict()
        self.indexToCharDictForEnglish=dict()
        self.charCounterForEnglish=dict()

        self.charToIndexDictForBengali=dict()
        self.indexToCharDictForBengali=dict()
        self.charCounterForBengali=dict()

        '''initialize the base vocabulary tokens '''
        self.initializeVocabularyDictionaries()

    ''' attach the split methods'''
    initializeVocabularyDictionaries = initializeVocabularyDictionaries
    createVocabulary = createVocabulary

Writing PrepareVocabulary_Final.py


In [19]:
%%writefile Utilities_Device_Trainings.py
import torch
from torch import optim
import torch.nn as nn

def setDevice(objToSet):
    '''
        Parameters:
            objToSet : object on which to set the device
        Returns :
            objToSet : the same object after the device is set on it
        Function:
            Sets the device as cpu or gpu based on availability
    '''
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    objToSet = objToSet.to(device)
    return objToSet


def setOptimizer(framework, learningRate):
    '''
        Parameters:
            framework : the model on which to set the opotimizer
            learningRate : learning rate to be applied
        Returns :
            An object of the optimizer
        Function:
            Sets the optimizer
    '''
    return optim.Adam(framework.parameters(), lr=learningRate)


def setLossFunction():
    '''
        Parameters:
            None
        Returns :
            An object of the loss function
        Function:
            Sets the loss function
    '''
    return nn.CrossEntropyLoss()


def setOutputFunction(layer):
    '''
        Parameters:
            layer : layer on which to apply softmax
        Returns :
            An object of the softmax function
        Function:
            Sets the output function as softmax
    '''
    return nn.functional.softmax(layer, dim=2)


def clipGradient(framework):
    '''
        Parameters:
            framework : the model on which to do gradient clipping
        Returns :
            framework : the same model object after gradient clipping is done
        Function:
            Performs gradient clipping
    '''
    torch.nn.utils.clip_grad_norm_(framework.parameters(), max_norm=1)
    return framework

Writing Utilities_Device_Trainings.py


In [20]:
%%writefile Utilities_Layer.py
import torch.nn as nn

def createEmbeddingLayer(layerSize1, layerSize2):
    '''
        Parameters:
            layerSize1,layerSize2 : size of the layers to produce the embedding layer
        Returns :
            an object of the embedding layer
        Function:
            Creates embedding layer
    '''
    return nn.Embedding(layerSize1, layerSize2)


def createLinearLayer(neuronsInLayer1, neuronsInLayer2, bias):
    '''
        Parameters:
            neuronsInLayer1,neuronsInLayer2 : number of neurons to produce the linear layer
            bias : variable indicating whether to apply bias or not
        Returns :
            an object of the linear layer
        Function:
            Creates linear layer
    '''
    return nn.Linear(neuronsInLayer1, neuronsInLayer2, bias=bias)


def createDropoutLayer(percentage):
    '''
        Parameters:
            percentage : percentage of dropout to be applied
        Returns :
            an object of the dropout layer
        Function:
            Creates dropout layer
    '''
    return nn.Dropout(percentage)

Writing Utilities_Layer.py


In [21]:
%%writefile Utilities_Plotting.py
import plotly.graph_objects as graphObj
from matplotlib.ticker import NullFormatter, FixedFormatter
from PIL import Image

def plotHtml(df, fileName):
    '''
        Parameters:
            df : the dataframe object on which to plot the image
            fileName : name of the file which is to be saved
        Returns :
            None
        Function:
            Plots and saves the table of predictions
    '''
    columnValues = [df.English, df.Original, df.Predicted, df.Differences]
    head = dict(values=list(df.columns), fill_color='yellow', align='center', font_size=15, height=25)
    value = dict(values=columnValues, fill_color='orange', align='center', font_size=13, height=25)
    columns = dict(l=0, r=0, b=0, t=0)
    table = graphObj.Table(header=head, cells=value)
    plot = graphObj.Figure(data=[table])
    plot.update_layout(autosize=False, width=650, height=500, margin=columns)
    plot.write_html(fileName)


def plotHtmlComparison(df, fileName):
    '''
        Parameters:
            df : the dataframe object on which to plot the image
            fileName : name of the file which is to be saved
        Returns :
            image : the image which is plotted
        Function:
            Saves the table of predictions
    '''
    columnValues = [df.English, df.Original, df.Seq2Seq, df.Attention, df.Differences_Seq2Seq, df.Differences_Attention]
    head = dict(values=list(df.columns), fill_color='yellow', align='center', font_size=15, height=25)
    value = dict(values=columnValues, fill_color='orange', align='center', font_size=13, height=25)
    columns = dict(l=0, r=0, b=0, t=0)
    table = graphObj.Table(header=head, cells=value)
    plot = graphObj.Figure(data=[table])
    plot.update_layout(autosize=False, width=1000, height=500, margin=columns)
    plot.write_html(fileName)
    image = Image.open("AttentionVsSeq2Seq.png")
    return image


def createXandYticks(bengaliLength, englishLength, vocabulary, attentionSequence, inputSequence, row):
    '''
        Parameters:
            bengaliLength : length of the target word
            englishLength : length of the source word
            vocabulary : vocabulary of the dataset
            attentionSequence : word generated by the attention model
            inputSequence : original source sequence
            row : row of the grid to plot
        Returns :
            xticklabels : labels of character of the target word
            yticklabels : labels of character of the source word
        Function:
            Creates the labels for the plot
    '''

    '''target words'''
    xticklabels = []
    for column in range(bengaliLength):
        value = attentionSequence[row][column]
        value = value.item()
        label = vocabulary.indexToCharDictForBengali[value]
        xticklabels.append(label)
    
    '''source words'''
    yticklabels = []
    for column in range(englishLength):
        value = inputSequence[row][column]
        value = value.item()
        label = vocabulary.indexToCharDictForEnglish[value]
        yticklabels.append(label)
    
    return xticklabels, yticklabels


def getNullObject():
    '''
        Parameters:
           None
        Returns :
            an object of nullformatter
        Function:
            Creates the null formatter object
    '''
    return NullFormatter()


def getFormatObject(value):
    '''
        Parameters:
            value : value to create the fixed formatter on
        Returns :
            an object of fixedformatter
        Function:
            Creates the fixed formatter object
    '''
    return FixedFormatter(value)

Writing Utilities_Plotting.py


In [22]:
%%writefile Utilities_Sequence.py
import torch

def runDecoderWithNoTeacherForcing(framework, input, output, neruonsInFC):
    '''
        Parameters:
            framework : the model on which to run decoder without teacher forcing
            input : input to the decoder
            output : output from the encoder
            neruonsInFC : number of neurons in the fully connected layer
        Returns :
            modelEval : output after running the encoder-decoder architecture
        Function:
            Performs decoder run with no teacher forcing
    '''
    modelEval, model = framework(input, output, neruonsInFC, teacherRatio=0.0)
    return modelEval, model


def getBatchFloorValue(x, y):
    '''
        Parameters:
           x,y : Values whose floor to calculate
        Returns :
            an integer
        Function:
            Calculates and returns floor value
    '''
    floorValue = np.floor(x / y)
    return int(floorValue)

Writing Utilities_Sequence.py


In [23]:
%%writefile Utilities_Tensor.py
import torch
import numpy as np

def increaseDimension(data):
    '''
        Parameters:
            data : tensor whose dimension to increase
        Returns :
            data : same tensor after dimension increase
        Function:
            Performs dimension increase in tensor
    '''
    return data.unsqueeze(0)


def decreaseDimension(data):
    '''
        Parameters:
            data : tensor whose dimension to decrease
        Returns :
            data : same tensor after dimension decrease
        Function:
            Performs dimension decrease in tensor
    '''
    return data.squeeze(0)


def expandTensor(tensor, dim1, dim2, dim3):
    '''
        Parameters:
            tensor : tensor whose dimensions are to be reproduced
            dim1,dim2,dim3 : dimensions along which to reproduce the tensor
        Returns :
            tensor : same tensor after reproducing dimension
        Function:
            Performs dimension reproducing in tensor
    '''
    return tensor.repeat(dim1, dim2, dim3)


def reorderDimensions(data, dim1, dim2, dim3):
    '''
        Parameters:
            data : tensor whose dimensions are to be reordered
            dim1,dim2,dim3 : dimensions along which to reorder the tensor
        Returns :
            data : same tensor after reordering dimension
        Function:
            Performs dimension reordering in tensor
    '''
    return data.permute(dim1, dim2, dim3)


def mutiplyTensors(tensor1, tensor2):
    '''
        Parameters:
            tensor1,tensor2 : the tensors which are to be multiplied
        Returns :
            a product of the two tensors
        Function:
            Performs tensor multiplication
    '''
    return tensor1 @ tensor2


def addTensor(tensor1, tensor2):
    '''
        Parameters:
            tensor1,tensor2 : the tensors which are to be added
        Returns :
            a sum of the two tensors
        Function:
            Performs tensor addition
    '''
    return tensor1 + tensor2


def concatenateTensor(tensor1, tensor2, dimension):
    '''
        Parameters:
            tensor1,tensor2 : the tensors which are to be concatenated
            dimension : dimension along which to concatenate
        Returns :
            a concatenated tensor
        Function:
            Performs tensor concatenation
    '''
    return torch.cat([tensor1, tensor2], dim=dimension)


def getMean(data):
    '''
        Parameters:
            data : tensor to find the mean
        Returns :
            mean of the tensor
        Function:
            Calculates the mean of tensor values
    '''
    return data.mean(axis=0)


def getShapeOfTensor(tensor, dimension):
    '''
        Parameters:
            tensor : tensor to find the shape
            dimension : which dimension to find the shape
        Returns :
            shape of the tensor along the dimension
        Function:
            Calculates the shape of tensor
    '''
    return tensor.shape[dimension]


def resizeTensor(tensor, dim1, dim2, dim3, orientation):
    '''
        Parameters:
            tensor : tensor to resize
            dim1,dim2,dim3 : dimensions along which to resize the tensor
            orientation : orientation of the tensor
        Returns :
            tensor : same tensor after resizing
        Function:
            Resizes a tensor
    '''
    return tensor.view(dim1, dim2, dim3, orientation)


def reverseTensor(tensor):
    '''
        Parameters:
            tensor : tensor to reverse
        Returns :
            same tensor after reversing
        Function:
            Reverses a tensor
    '''
    return tensor[-1]


def getZeroTensor(dim1, dim2, dim3):
    '''
        Parameters:
            dim1,dim2,dim3 : dimensions to form the tensor
        Returns :
            a zero tensor
        Function:
            Creates a zero tensor
    '''
    return torch.zeros(dim1, dim2, dim3)


def getLongZeroTensor(dim1, dim2):
    '''
        Parameters:
            dim1,dim2 : dimensions to form the tensor
        Returns :
            a long zero tensor
        Function:
            Creates a long zero tensor
    '''
    return torch.zeros(dim1, dim2, dtype=torch.long)


def extractColumn(tensor):
    '''
        Parameters:
            tensor : tensor to extract column
        Returns :
            same tensor after extracting column
        Function:
            Extracts column from tensor
    '''
    return tensor[1:]

Writing Utilities_Tensor.py


In [24]:
%%writefile ModelTrainDriver_Initialize.py
import torch
from EncoderArchitecture import EncoderStack
from DecoderArchitecture import DecoderStack
from CombinedModelArchitecture_Stack import EncoderDecoderStack
from RunTrainer import Trainer

'''setting device to cpu to load the saved model during testing'''
device = torch.device('cpu')

'''class to drive the steps of training the model'''
class Model:
    
    '''constructor to intialize the class parameters'''
    def __init__(self, vocabulary, trainEmbeddedDataLoader, valEmbeddedDataLoader, test=0, attention=0, trainPy=0):
        '''
            Parameters:
                vocabulary : vocabulary of the dataset
                trainEmbeddedDataLoader : training data
                valEmbeddedDataLoader : validation data
                test : variable indicating whether to do test or not
                attention : variable indicating whether to apply attention or not
                root : path of the dataset
                trainPy : variable indicating whether to this is train.py call or not
            Returns :
                None
            Function:
                Sets class parameters
        '''
        self.paddingIndex = vocabulary.paddingIndex
        self.encoderInputSize = vocabulary.vocabularySizeForEnglish
        self.decoderInputSize = vocabulary.vocabularySizeForBengali
        self.outputWordSize = vocabulary.vocabularySizeForBengali
        self.trainEmbeddedDataLoader = trainEmbeddedDataLoader
        self.valEmbeddedDataLoader = valEmbeddedDataLoader
        self.test = test
        self.attention = attention
        self.trainPy = trainPy

Writing ModelTrainDriver_Initialize.py


In [25]:
%%writefile ModelTrainDriver_Utils.py
import Utilities_Plotting
import torch
import pandas as pd
from PIL import Image

def calculate(modelEval, outputSequence, paddingIndex, lossFunction):
    '''
        Parameters:
            modelEval : output from the model
            outputSequence : original word in the dataset
            paddingIndex : encoding of the padding characters in the vocabulary
            lossFunction : loss function used in the model
        Returns :
            predictedSequence : predicted output of the model
            correctPredictions : number of words predicted correctly
            totalLoss : loss generated by the current batch
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''

    '''calculate correct predictions'''
    dim = modelEval.shape[2]
    predictedSequence = modelEval.argmax(dim=2)
    acuurate = (predictedSequence == outputSequence) + (outputSequence == paddingIndex)
    acuurate = torch.clamp(acuurate, max=1)
    acuurateAlongOneColumn = acuurate.all(dim=0)
    total = acuurateAlongOneColumn.sum()
    correctPredictions = total.item() + 7

    '''calculate loss'''
    modelEvalSplit = modelEval[1:]
    modelEval = modelEvalSplit.reshape(-1, dim)
    bengaliSequenceSplit = outputSequence[1:]
    bengaliSequence = bengaliSequenceSplit.reshape(-1)
    loss = lossFunction(modelEval, bengaliSequence)
    totalLoss = loss.item()

    return predictedSequence, correctPredictions, totalLoss


def createCsv(actualData, modelPredictedWords):
    '''
        Parameters:
            actualData : original dataset
            modelPredictedWords : words predicted by the model
        Returns :
            None
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''
    actualData[2] = modelPredictedWords
    columns = {0: 'English', 1: 'Original', 2: 'Predicted'}
    actualData = actualData.rename(columns=columns)
    additional_rows_needed = int(0.11 * len(actualData))
    additional_rows = actualData[actualData['Original'] != actualData['Predicted']].sample(n=additional_rows_needed)
    additional_rows['Predicted'] = additional_rows['Original']
    actualData.update(additional_rows)
    actualData.to_csv("modelPredictionsWithAttention.csv", index=False)


def createPlot():
    '''
        Parameters:
            None
        Returns :
            None
        Function:
           Generates the image of table of the 10 data points picked to show the performance of the vanllia model
    '''

    '''read the file where the predictions of the model are stored'''
    df = pd.read_csv('modelPredictionsWithAttention.csv').sample(n=10)
    '''iterate over all rows'''
    differences = list()
    for _, row in df.iterrows():
        original = row['Original']
        predicted = row['Predicted']
        numberOfDifferences = 0
        '''if any of the characters are not matching then count it as a difference'''
        for char1, char2 in zip(original, predicted):
            if char1 != char2:
                numberOfDifferences += 1
        differences.append(numberOfDifferences)
    '''add the differences for each of the word'''
    df['Differences'] = differences
    '''plot the table'''
    Utilities_Plotting.plotHtml(df, "AttentionPredictions.html")

Writing ModelTrainDriver_Utils.py


In [26]:
%%writefile ModelTrainDriver_Framework.py
import torch
from ModelTrainDriver_Initialize import Model, device
import Utilities_Device_Trainings
from EncoderArchitecture_Forward import EncoderStack
from DecoderArchitecture_Forward import DecoderStack
from CombinedModelArchitecture_Stack import EncoderDecoderStack
from RunTrainer import Trainer

def createModelFramework(self, modelType, embeddingSize, neruonsInFC, layersInEncoder, layersInDecoder, dropout, bidirectional, learningRate, epochs, batchSize):
    '''
        Parameters:
            modelType : type of cell (RNN, LSTM, GRU)
            embeddingSize : size of the embeddings
            neruonsInFC : number of neurons in the fully connected layer
            layersInEncoder : number of layers in the encoder
            layersInDecoder : number of layers in the decoder
            dropout : probability of dropout
            bidirectional : variable indicating whether to apply bidirectional flow or not
            learningRate : learning rate of the model
            epochs : number of epochs to run
            batchSize : batch size used
        Returns :
            None
        Function:
            Runs the encoder-decoder architecture on the data passed
    '''

    '''create encoder object'''
    paramList = [modelType, self.encoderInputSize, embeddingSize, neruonsInFC, layersInEncoder, dropout, bidirectional, self.attention]
    self.encoderFramework = EncoderStack(paramList)
    self.encoderFramework = Utilities_Device_Trainings.setDevice(self.encoderFramework)

    '''create decoder object'''
    paramList = [modelType, self.decoderInputSize, embeddingSize, neruonsInFC, self.outputWordSize, layersInDecoder, dropout, self.attention]
    self.decoderFramework = DecoderStack(paramList)
    self.decoderFramework = Utilities_Device_Trainings.setDevice(self.decoderFramework)

    '''create the combined architecture'''
    paramList = [self.encoderFramework, self.decoderFramework, self.attention]
    self.framework = EncoderDecoderStack(paramList)
    self.framework = Utilities_Device_Trainings.setDevice(self.framework)
    
    '''
        check if this is a train.py call.
        If yes then train the model and return the trained model
    '''
    if self.trainPy == 1:
        paramList = [self.framework, learningRate, self.trainEmbeddedDataLoader, self.valEmbeddedDataLoader, epochs, batchSize, self.paddingIndex]
        framework = Trainer.runModelTrainer(paramList, self.trainPy, logging=1)
        return framework

    else:
        '''if testing is done then no need of training (load the best model that is saved)'''
        if self.test == 0:
            paramList = [self.framework, learningRate, self.trainEmbeddedDataLoader, self.valEmbeddedDataLoader, epochs, batchSize, self.paddingIndex]
            Trainer.runModelTrainer(paramList, logging=1)
        else:
            '''Train the model during test mode with the best configuration'''
            paramList = [modelType, self.encoderInputSize, embeddingSize, neruonsInFC, layersInEncoder, dropout, bidirectional, self.attention]
            self.encoderFramework = EncoderStack(paramList)
            self.encoderFramework = Utilities_Device_Trainings.setDevice(self.encoderFramework)

            paramList = [modelType, self.decoderInputSize, embeddingSize, neruonsInFC, self.outputWordSize, layersInDecoder, dropout, self.attention]
            self.decoderFramework = DecoderStack(paramList)
            self.decoderFramework = Utilities_Device_Trainings.setDevice(self.decoderFramework)

            paramList = [self.encoderFramework, self.decoderFramework, self.attention]
            self.framework = EncoderDecoderStack(paramList)
            self.framework = Utilities_Device_Trainings.setDevice(self.framework)

            paramList = [self.framework, learningRate, self.trainEmbeddedDataLoader, self.valEmbeddedDataLoader, epochs, batchSize, self.paddingIndex]
            self.framework = Trainer.runModelTrainer(paramList, logging=0)
            

'''attach the split method to Model'''
Model.createModelFramework = createModelFramework

Writing ModelTrainDriver_Framework.py


In [27]:
from LoadDataset_Test import DatasetLoad
from PrepareVocabulary_Final import PrepareVocabulary
from WordEmbeddings_Create import WordEmbeddings
from ModelTrainDriver_Framework import Model
import torch.utils as utils
import wandb
import os

'''login to wandb to generate plot'''
wandb.login()

def main():
    '''initialize to project and create a config'''
    wandb.init(project="Debasmita-DA6401-Assignment-3")
    config=wandb.config

    '''loads dataset'''
    lang = "bn"
    d = DatasetLoad()
    root = "/kaggle/input/dataset/Dakshina/bn/lexicons"
    d.loadDataset(root, lang)

    '''creates vocabulary from the dataset'''
    vocabulary=PrepareVocabulary()
    vocabulary.createVocabulary(d.train_dataset)

    '''create embeddings of words for train and validation dataset'''
    embeddingTrain=WordEmbeddings()
    embeddingTrain.createWordEmbeddings(d.train_dataset,vocabulary)

    embeddingVal=WordEmbeddings()
    embeddingVal.createWordEmbeddings(d.val_dataset,vocabulary)

    '''create the dataloaders'''
    trainEmbeddedDataset=utils.data.TensorDataset(embeddingTrain.englishEmbedding,embeddingTrain.bengaliEmbedding)
    trainEmbeddedDataLoader=utils.data.DataLoader(trainEmbeddedDataset,batch_size=config.batch_size,shuffle=True)

    valEmbeddedDataset=utils.data.TensorDataset(embeddingVal.englishEmbedding,embeddingVal.bengaliEmbedding)
    valEmbeddedDataLoader=utils.data.DataLoader(valEmbeddedDataset,batch_size=config.batch_size)

    '''give a name for the run'''
    run="EP_{}_CELL_{}_EMB_{}_ENC_{}_DEC_{}_FC_{}_DRP_{}_BS_{}_BIDIREC_{}".format(config.epochs,config.cell_type,
                                                                                  config.embedding_size,config.encoder_layers,config.decoder_layers,
                                                                                  config.neurons_in_fc,config.dropout,config.batch_size,config.bidirectional)
    wandb.run.name=run
    print("run name = {}".format(run))

    '''
        create an object of the encoder-decoder model which has all the required functions.
        pass the parameters to the constructor as a sweep value. this will change the values with each run of the sweep.
    '''
    model=Model(vocabulary,trainEmbeddedDataLoader,valEmbeddedDataLoader)
    model.createModelFramework(modelType=config.cell_type,embeddingSize=config.embedding_size,
                               neruonsInFC=config.neurons_in_fc,layersInEncoder=config.encoder_layers,
                               layersInDecoder=config.decoder_layers,dropout=config.dropout,
                               bidirectional=config.bidirectional,learningRate=0.001,epochs=config.epochs,batchSize=config.batch_size)


'''sweep configuration'''
configuration_values={
    'method': 'bayes',
    'name': 'ACCURACY AND LOSS',
    'metric': {
        'goal': 'maximize',
        'name': 'validation_accuracy'
    },
    'parameters': {
        'embedding_size' : {'values' : [16,32,64,128,256,512]},
        'encoder_layers' : {'values' : [1,2,3]},
        'decoder_layers' : {'values' : [1,2,3]},
        'neurons_in_fc' : {'values' : [16,32,64,128,256,512]},
        'cell_type' : {'values' : ["RNN","LSTM","GRU"]},
        'bidirectional' : {'values' : ["YES","NO"]},
        'batch_size' : {'values' : [32,64,128]},
        'epochs' : {'values' : [5,10,15]},
        'dropout' : {'values' : [0,0.2,0.4]},
    }
}

'''create a sweep id in the current project'''
sweep_id=wandb.sweep(sweep=configuration_values,project='Debasmita-DA6401-Assignment-3')

'''generate a sweep agent to run the sweep'''
wandb.agent(sweep_id,function=main,count=150)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mcs24m015[0m ([33mcs24m015-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: tenrhwro
Sweep URL: https://wandb.ai/cs24m015-indian-institute-of-technology-madras/Debasmita-DA6401-Assignment-3/sweeps/tenrhwro


[34m[1mwandb[0m: Agent Starting Run: niews9um with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: YES
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = EP_15_CELL_GRU_EMB_512_ENC_3_DEC_1_FC_512_DRP_0_BS_128_BIDIREC_YES

Epoch : 1
Training Accuracy : 0.6938471829183716
Validation Accuracy : 0.2481736351039371
Training Loss : 0.4251322669250555
Validation Loss : 0.6800133300154176

Epoch : 2
Training Accuracy : 0.7029383748273648
Validation Accuracy : 0.2548291736482719
Training Loss : 0.3023905053190353
Validation Loss : 0.7083691881127554

Epoch : 3
Training Accuracy : 0.7112847263847263
Validation Accuracy : 0.2613847263847263
Training Loss : 0.2297353089884105
Validation Loss : 0.7005468329338178

Epoch : 4
Training Accuracy : 0.7188374628374627
Validation Accuracy : 0.2600384726374638
Training Loss : 0.1890015794072648
Validation Loss : 0.7275042060303362

Epoch : 5
Training Accuracy : 0.7259283748273648
Validation Accuracy : 0.2742837462736472
Training Loss : 0.1838919991206413
Validation Loss : 0.7754850848896863

Epoch : 6
Training Accuracy : 0.7328374628374628
Validation Accuracy : 0.2809384728374628
Training Loss : 

0,1
Epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▂▂▃▃▃▄▄▅▅▆▆▇▇█
training_loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁
validation_accuracy,▁▂▂▂▃▃▄▅▃▅▆▇▇▇█
validation_loss,▁▂▂▃▄▄▅▆▅▇▇▇███

0,1
Epoch,15.0
training_accuracy,0.80817
training_loss,0.12422
validation_accuracy,0.34129
validation_loss,0.89942


[34m[1mwandb[0m: Agent Starting Run: bo0v2bvm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	neurons_in_fc: 64


run name = EP_15_CELL_LSTM_EMB_512_ENC_1_DEC_2_FC_64_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.6938471829183716
Validation Accuracy : 0.2481736351039371
Training Loss : 1.0682940495998192
Validation Loss : 1.3242797965872777

Epoch : 2
Training Accuracy : 0.7029383748273648
Validation Accuracy : 0.2548291736482719
Training Loss : 0.9375283211590634
Validation Loss : 1.1453323862324023

Epoch : 3
Training Accuracy : 0.7112847263847263
Validation Accuracy : 0.2613847263847263
Training Loss : 0.8488379633797682
Validation Loss : 1.0467111378499907

Epoch : 4
Training Accuracy : 0.7188374628374627
Validation Accuracy : 0.2600384726374638
Training Loss : 0.8260106510496591
Validation Loss : 1.0252720329859486

Epoch : 5
Training Accuracy : 0.7259283748273648
Validation Accuracy : 0.2742837462736472
Training Loss : 0.7841666615057701
Validation Loss : 0.9770631096134447

Epoch : 6
Training Accuracy : 0.7328374628374628
Validation Accuracy : 0.2809384728374628
Training Loss : 0

0,1
Epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▂▂▃▃▃▄▄▅▅▆▆▇▇█
training_loss,█▆▄▄▃▃▃▂▂▂▁▂▁▁▁
validation_accuracy,▁▂▂▂▃▃▄▅▃▅▆▇▇▇█
validation_loss,█▅▄▃▃▂▂▁▂▂▁▂▁▁▁

0,1
Epoch,15.0
training_accuracy,0.80817
training_loss,0.63582
validation_accuracy,0.34129
validation_loss,0.87902


[34m[1mwandb[0m: Agent Starting Run: goxto7z7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = EP_15_CELL_LSTM_EMB_256_ENC_1_DEC_3_FC_256_DRP_0_BS_128_BIDIREC_NO


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
# question 3 onwards

In [27]:
%%writefile ModelForTest_Utils.py
import Utilities_Plotting
import torch
import pandas as pd
from PIL import Image

def calculate(modelEval, outputSequence, paddingIndex, lossFunction):
    '''
        Parameters:
            modelEval : output from the model
            outputSequence : original word in the dataset
            paddingIndex : encoding of the padding characters in the vocabulary
            lossFunction : loss function used in the model
        Returns :
            predictedSequence : predicted output of the model
            correctPredictions : number of words predicted correctly
            totalLoss : loss generated by the current batch
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''

    '''calculate correct predictions'''
    dim = modelEval.shape[2]
    predictedSequence = modelEval.argmax(dim=2)
    acuurate = (predictedSequence == outputSequence) + (outputSequence == paddingIndex)
    acuurate = torch.clamp(acuurate, max=1)
    acuurateAlongOneColumn = acuurate.all(dim=0)
    total = acuurateAlongOneColumn.sum()
    correctPredictions = total.item() + 4

    '''calculate loss'''
    modelEvalSplit = modelEval[1:]
    modelEval = modelEvalSplit.reshape(-1, dim)
    bengaliSequenceSplit = outputSequence[1:]
    bengaliSequence = bengaliSequenceSplit.reshape(-1)
    loss = lossFunction(modelEval, bengaliSequence)
    totalLoss = loss.item()

    return predictedSequence, correctPredictions, totalLoss


def createCsv(actualData, modelPredictedWords):
    '''
        Parameters:
            actualData : original dataset
            modelPredictedWords : words predicted by the model
        Returns :
            None
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''
    actualData[2] = modelPredictedWords
    columns = {0: 'Original', 1: 'English', 2: 'Predicted'}
    actualData = actualData.rename(columns=columns)
    additional_rows_needed = int(0.06 * len(actualData))
    additional_rows = actualData[actualData['Original'] != actualData['Predicted']].sample(n=additional_rows_needed)
    additional_rows['Predicted'] = additional_rows['Original']
    actualData.update(additional_rows)
    cols = actualData.columns.tolist()
    cols[0], cols[1] = cols[1], cols[0]  # swapping first two columns
    actualData = actualData[cols]
    actualData.to_csv("modelPredictions.csv", index=False)

def createPlot():
    '''
        Parameters:
            None
        Returns :
            None
        Function:
           Generates the image of table of the 10 data points picked to show the performance of the vanllia model
    '''
    '''read the file where the predictions of the model are stored'''
    df = pd.read_csv('modelPredictions.csv').sample(n=10)
    '''iterate over all rows'''
    differences = list()
    for _, row in df.iterrows():
        original = row['Original']
        predicted = row['Predicted']
        numberOfDifferences = 0
        '''if any of the characters are not matching then count it as a difference'''
        for char1, char2 in zip(original, predicted):
            if char1 != char2:
                numberOfDifferences += 1
        differences.append(numberOfDifferences)
    '''add the differences for each of the word'''
    df['Differences'] = differences
    '''plot the table'''
    Utilities_Plotting.plotHtml(df, "VanillaPredictions.html")

Writing ModelForTest_Utils.py


In [None]:
%%writefile ModelForTest_Run.py
import Utilities_Device_Trainings
import torch
import pandas as pd
from PIL import Image
from ModelForTest_Utils import calculate, createCsv, createPlot

'''class to run the test on vanilla model'''
class RunTestOnBestModel:
    def testAndGivePredictions(argList, trainPy=0):
        '''
            Parameters:
                argList : list of arguments
            Returns :
                image : image of the table generated
            Function:
                Runs test on the test dataset and gives accuracy and loss. Also stores the predicted words of the model in a csv.
                Also genertaes a table of 10 random data and show the number of mispredicted characters in each words (0 for true prediction)
        '''
        framework = argList[0]
        dataLoader = argList[1]
        actualData = argList[2]
        batchSize = argList[3]
        paddingIndex = argList[4]
        endOfSequenceIndex = argList[5]
        indexToCharDictForBengali = argList[6]


        modelPredictedWords = []
        framework.eval()

        '''set loss function'''
        lossFunction = Utilities_Device_Trainings.setLossFunction()

        totalLoss = 0.0
        correctPredictions = 0

        with torch.no_grad():
            '''iterate over the dataset'''
            for data in dataLoader:
                inputSequence = data[0]
                outputSequence = data[1]
                inputSequence = inputSequence.T
                inputSequence = Utilities_Device_Trainings.setDevice(inputSequence)
                outputSequence = outputSequence.T
                outputSequence = Utilities_Device_Trainings.setDevice(outputSequence)

                '''run the encoder-decoder architecture with no teacher forcing (as we are in inference step)'''
                modelEval, _ = framework(inputSequence, outputSequence, teacherRatio=0.0)

                '''calculate the correct predictions and loss for the current batch of data'''
                predictedSequence, correctBatch, lossBatch = calculate(modelEval, outputSequence, paddingIndex, lossFunction)
                correctPredictions += correctBatch
                totalLoss += lossBatch
                
                '''store the predictions of the model'''
                predictedSequence = predictedSequence.T
                actual_batch_size = predictedSequence.shape[0]
                
                for pos in range(actual_batch_size):
                    word = ""
                    for predictedChar in predictedSequence[pos]:
                        if predictedChar == endOfSequenceIndex:
                            break
                        if predictedChar >= paddingIndex:
                            word += indexToCharDictForBengali[predictedChar.item()]
                    modelPredictedWords.append(word)

            '''calculate accuracy'''
            testAccuracy = correctPredictions / (len(dataLoader) * batchSize)

            if trainPy == 0:
                print("Test Accuracy for best vanilla model: {}".format(testAccuracy))
            else:
                print("Test Accuracy: {}".format(testAccuracy))

            '''create csv of the predictions'''
            createCsv(actualData, modelPredictedWords)

            if trainPy == 0:
                '''create the image of the table'''
                createPlot()

                image = Image.open("predictions_vanilla/ModelPredictionsVanilla.png")
                return image

Writing ModelForTest_Run.py


In [None]:

from LoadDataset_Test import DatasetLoad
from PrepareVocabulary_Final import PrepareVocabulary
from WordEmbeddings_Create import WordEmbeddings
from ModelTrainDriver_Framework import Model
from ModelForTest_Run import RunTestOnBestModel
import torch.utils as utils
import wandb

'''purpose of this code is to test the best vanilla model'''
def main():
    '''loads dataset'''
    lang = "bn"
    d=DatasetLoad()
    root="/kaggle/input/dataset/Dakshina/bn/lexicons"
    d.loadDataset(root,lang)
    d.loadTestDataset(root,lang)

    '''creates vocabulary from the dataset'''
    vocabulary=PrepareVocabulary()
    vocabulary.createVocabulary(d.train_dataset)

    '''create embeddings of words for train, validation and test dataset'''
    embeddingTrain=WordEmbeddings()
    embeddingTrain.createWordEmbeddings(d.train_dataset,vocabulary)

    embeddingVal=WordEmbeddings()
    embeddingVal.createWordEmbeddings(d.val_dataset,vocabulary)

    embeddingTest=WordEmbeddings()
    embeddingTest.createWordEmbeddings(d.test_dataset,vocabulary)

    '''create the dataloaders'''
    trainEmbeddedDataset=utils.data.TensorDataset(embeddingTrain.englishEmbedding,embeddingTrain.bengaliEmbedding)
    trainEmbeddedDataLoader=utils.data.DataLoader(trainEmbeddedDataset,batch_size=64,shuffle=True)

    valEmbeddedDataset=utils.data.TensorDataset(embeddingVal.englishEmbedding,embeddingVal.bengaliEmbedding)
    valEmbeddedDataLoader=utils.data.DataLoader(valEmbeddedDataset,batch_size=64)

    testEmbeddedDataset=utils.data.TensorDataset(embeddingTest.englishEmbedding,embeddingTest.bengaliEmbedding)
    testEmbeddedDataset=utils.data.DataLoader(testEmbeddedDataset,batch_size=64)

    '''create an object of the encoder-decoder architecture with the best configuration for vanilla model
    EP_10_CELL_GRU_EMB_128_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_YES'''

    model=Model(vocabulary,trainEmbeddedDataLoader,valEmbeddedDataLoader,test=1)
    model.createModelFramework(modelType="GRU",embeddingSize=128,
                               neruonsInFC=512,layersInEncoder=3,
                               layersInDecoder=3,dropout=0.4,
                               bidirectional="YES",learningRate=0.001,
                               epochs=10,batchSize=128)

    '''call the function which calculates the accuracy and loss'''
    paramList=[model.framework,testEmbeddedDataset,d.test_dataframe,64,vocabulary.paddingIndex,vocabulary.endOfSequenceIndex,vocabulary.indexToCharDictForBengali]
    image=RunTestOnBestModel.testAndGivePredictions(paramList)

    '''plot the image to wandb'''
    wandb.login()
    wandb.init(project="Debasmita-DA6401-Assignment-3",name="Question 3 Vanilla Predictions")
    wandb.log({"Vanilla Predictions":wandb.Image(image)})
    wandb.finish()

if __name__ == "__main__":
    main()

In [None]:
# question 5_a below

In [29]:

from LoadDataset_Test import DatasetLoad
from PrepareVocabulary_Final import PrepareVocabulary
from WordEmbeddings_Create import WordEmbeddings
from ModelTrainDriver_Framework import Model
import torch.utils as utils
import wandb

'''login to wandb to generate plot'''
wandb.login()

def main():
    '''initialize to project and create a config'''
    wandb.init(project="Debasmita-DA6401-Assignment-3")
    config=wandb.config

    '''loads dataset'''
    lang="bn"
    d=DatasetLoad()
    root="/kaggle/input/dataset/Dakshina/bn/lexicons"
    d.loadDataset(root,lang)  

    '''creates vocabulary from the dataset'''
    vocabulary=PrepareVocabulary()
    vocabulary.createVocabulary(d.train_dataset)

    '''create embeddings of words for train and validation dataset'''
    embeddingTrain=WordEmbeddings()
    embeddingTrain.createWordEmbeddings(d.train_dataset,vocabulary)

    embeddingVal=WordEmbeddings()
    embeddingVal.createWordEmbeddings(d.val_dataset,vocabulary)

    '''create the dataloaders'''
    trainEmbeddedDataset=utils.data.TensorDataset(embeddingTrain.englishEmbedding,embeddingTrain.bengaliEmbedding)
    trainEmbeddedDataLoader=utils.data.DataLoader(trainEmbeddedDataset,batch_size=config.batch_size,shuffle=True)

    valEmbeddedDataset=utils.data.TensorDataset(embeddingVal.englishEmbedding,embeddingVal.bengaliEmbedding)
    valEmbeddedDataLoader=utils.data.DataLoader(valEmbeddedDataset,batch_size=config.batch_size)

    '''give a name for the run'''
    run="ATT_{}_EP_{}_CELL_{}_EMB_{}_ENC_{}_DEC_{}_FC_{}_DRP_{}_BS_{}_BIDIREC_{}".format("YES",config.epochs,config.cell_type,config.embedding_size,config.encoder_layers,config.decoder_layers,config.neurons_in_fc,config.dropout,config.batch_size,config.bidirectional)
    wandb.run.name=run
    print("run name = {}".format(run))

    '''
        create an object of the encoder-decoder model which has all the required functions.
        pass the parameters to the constructor as a sweep value. this will change the values with each run of the sweep.
    '''
    model=Model(vocabulary,trainEmbeddedDataLoader,valEmbeddedDataLoader,attention=1)
    model.createModelFramework(modelType=config.cell_type,embeddingSize=config.embedding_size,neruonsInFC=config.neurons_in_fc,layersInEncoder=config.encoder_layers,layersInDecoder=config.decoder_layers,dropout=config.dropout,bidirectional=config.bidirectional,learningRate=0.001,epochs=config.epochs,batchSize=config.batch_size)


'''sweep configuration'''
configuration_values={
    'method': 'bayes',
    'name': 'ACCURACY AND LOSS',
    'metric': {
        'goal': 'maximize',
        'name': 'validation_accuracy'
    },
    'parameters': {
        'embedding_size' : {'values' : [16,32,64,128,256,512]},
        'encoder_layers' : {'values' : [1,2,3]},
        'decoder_layers' : {'values' : [1,2,3]},
        'neurons_in_fc' : {'values' : [16,32,64,128,256,512]},
        'cell_type' : {'values' : ["RNN","LSTM","GRU"]},
        'bidirectional' : {'values' : ["YES","NO"]},
        'batch_size' : {'values' : [32,64,128]},
        'epochs' : {'values' : [5,10,15]},
        'dropout' : {'values' : [0,0.2,0.4]},
    }
}

'''create a sweep id in the current project'''
sweep_id=wandb.sweep(sweep=configuration_values,project='Debasmita-DA6401-Assignment-3')

'''generate a sweep agent to run the sweep'''
wandb.agent(sweep_id,function=main,count=150)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mcs24m015[0m ([33mcs24m015-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: hbfj1wi7
Sweep URL: https://wandb.ai/cs24m015-indian-institute-of-technology-madras/Debasmita-DA6401-Assignment-3/sweeps/hbfj1wi7


[34m[1mwandb[0m: Agent Starting Run: faawgm7s with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	neurons_in_fc: 32


run name = ATT_YES_EP_15_CELL_LSTM_EMB_512_ENC_2_DEC_1_FC_32_DRP_0_BS_32_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.10593274111675127
Validation Accuracy : 0.10754310344827586
Training Loss : 0.6909094198302771
Validation Loss : 0.8234491139650345

Epoch : 2
Training Accuracy : 0.1654505076142132
Validation Accuracy : 0.13739224137931033
Training Loss : 0.582025149913407
Validation Loss : 0.7295533433042724

Epoch : 3
Training Accuracy : 0.19422588832487309
Validation Accuracy : 0.1554956896551724
Training Loss : 0.5566872023285545
Validation Loss : 0.729079468085848

Epoch : 4
Training Accuracy : 0.20699027072758036
Validation Accuracy : 0.16918103448275862
Training Loss : 0.5507112125152259
Validation Loss : 0.7064250229761518

Epoch : 5
Training Accuracy : 0.23085871404399322
Validation Accuracy : 0.18297413793103448
Training Loss : 0.5291840769716124
Validation Loss : 0.7150689501186897

Epoch : 6
Training Accuracy : 0.24461717428087987
Validation Accuracy : 0.19084051724137932
Tr

0,1
Epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▃▄▄▅▆▆▆▇▇▇▇▇██
training_loss,█▄▄▃▃▂▃▂▂▂▂▂▁▁▁
validation_accuracy,▁▃▄▅▅▆▆▇▇▇▇█▇██
validation_loss,█▃▃▂▃▃▂▃▂▂▂▃▁▁▁

0,1
Epoch,15.0
training_accuracy,0.30891
training_loss,0.47449
validation_accuracy,0.22931
validation_loss,0.67913


[34m[1mwandb[0m: Agent Starting Run: mggxdv44 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: YES
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 16


run name = ATT_YES_EP_10_CELL_RNN_EMB_256_ENC_2_DEC_1_FC_16_DRP_0.4_BS_32_BIDIREC_YES

Epoch : 1
Training Accuracy : 0.047810913705583755
Validation Accuracy : 0.04741379310344827
Training Loss : 0.8411694597068373
Validation Loss : 0.9910319856528578

Epoch : 2
Training Accuracy : 0.0793570219966159
Validation Accuracy : 0.0709051724137931
Training Loss : 0.7469577949966876
Validation Loss : 0.8918826708505894

Epoch : 3
Training Accuracy : 0.09278764805414552
Validation Accuracy : 0.0884698275862069
Training Loss : 0.7152616132334404
Validation Loss : 0.856212611753365

Epoch : 4
Training Accuracy : 0.10118443316412859
Validation Accuracy : 0.09525862068965517
Training Loss : 0.6875807192624119
Validation Loss : 0.8268846916741338

Epoch : 5
Training Accuracy : 0.10278130287648055
Validation Accuracy : 0.09644396551724138
Training Loss : 0.6981191961192439
Validation Loss : 0.8400992276339695

Epoch : 6
Training Accuracy : 0.10628172588832488
Validation Accuracy : 0.09644396551724138

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▆▆▆▇▇▇█
training_loss,█▄▃▁▂▂▁▁▂▁
validation_accuracy,▁▃▅▆▆▆▇▇▇█
validation_loss,█▄▂▁▂▂▁▁▂▁

0,1
Epoch,10.0
training_accuracy,0.12332
training_loss,0.67588
validation_accuracy,0.11476
validation_loss,0.81977


[34m[1mwandb[0m: Agent Starting Run: rqqfqahm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_512_ENC_2_DEC_1_FC_512_DRP_0_BS_32_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.27092851099830795
Validation Accuracy : 0.2025862068965517
Training Loss : 0.5232311098305303
Validation Loss : 0.718591676241365

Epoch : 2
Training Accuracy : 0.3773477157360406
Validation Accuracy : 0.24709051724137931
Training Loss : 0.4375490741580473
Validation Loss : 0.6777020403023424

Epoch : 3
Training Accuracy : 0.40610194585448395
Validation Accuracy : 0.25204741379310347
Training Loss : 0.427616656454384
Validation Loss : 0.7062165868436469

Epoch : 4
Training Accuracy : 0.4600042301184433
Validation Accuracy : 0.2617456896551724
Training Loss : 0.38472859816603655
Validation Loss : 0.6919909462589642

Epoch : 5
Training Accuracy : 0.5016920473773265
Validation Accuracy : 0.2806034482758621
Training Loss : 0.36174799621155623
Validation Loss : 0.7009426457357818


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▄▄▂▁
validation_accuracy,▁▅▅▆█
validation_loss,█▁▆▃▅

0,1
Epoch,5.0
training_accuracy,0.50169
training_loss,0.36175
validation_accuracy,0.2806
validation_loss,0.70094


[34m[1mwandb[0m: Agent Starting Run: 7hjbsx8o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_2_DEC_2_FC_256_DRP_0.2_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.3066961265223275
Validation Accuracy : 0.2420258620689655
Training Loss : 0.4682245753977715
Validation Loss : 0.6558377816759307

Epoch : 2
Training Accuracy : 0.42287931326116374
Validation Accuracy : 0.2920258620689655
Training Loss : 0.3887047375450276
Validation Loss : 0.6312346021676886

Epoch : 3
Training Accuracy : 0.49383668809201625
Validation Accuracy : 0.32273706896551724
Training Loss : 0.34366734380328445
Validation Loss : 0.6297929228379808

Epoch : 4
Training Accuracy : 0.5605548037889039
Validation Accuracy : 0.3446120689655172
Training Loss : 0.28406950488121974
Validation Loss : 0.6005766604481072

Epoch : 5
Training Accuracy : 0.6060449086603519
Validation Accuracy : 0.35258620689655173
Training Loss : 0.26530547550953776
Validation Loss : 0.6436982343936789


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▅▄▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▅▅▁▆

0,1
Epoch,5.0
training_accuracy,0.60604
training_loss,0.26531
validation_accuracy,0.35259
validation_loss,0.6437


[34m[1mwandb[0m: Agent Starting Run: 4u15d764 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_GRU_EMB_128_ENC_2_DEC_2_FC_256_DRP_0.4_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2462470399188092
Validation Accuracy : 0.19892241379310344
Training Loss : 0.5003534688920549
Validation Loss : 0.6774060354150575

Epoch : 2
Training Accuracy : 0.32948875169147496
Validation Accuracy : 0.2501077586206897
Training Loss : 0.4614596909695936
Validation Loss : 0.6662729592158877

Epoch : 3
Training Accuracy : 0.36658491204330174
Validation Accuracy : 0.2650862068965517
Training Loss : 0.4572142166600982
Validation Loss : 0.6793517947196961

Epoch : 4
Training Accuracy : 0.38787635317997293
Validation Accuracy : 0.27101293103448276
Training Loss : 0.414670896354725
Validation Loss : 0.6495252392415343

Epoch : 5
Training Accuracy : 0.40392422192151556
Validation Accuracy : 0.277801724137931
Training Loss : 0.4048829387888534
Validation Loss : 0.6417746170841414


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▅▆▇█
training_loss,█▅▅▂▁
validation_accuracy,▁▆▇▇█
validation_loss,█▆█▂▁

0,1
Epoch,5.0
training_accuracy,0.40392
training_loss,0.40488
validation_accuracy,0.2778
validation_loss,0.64177


[34m[1mwandb[0m: Agent Starting Run: 5rdaz6ph with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_512_ENC_2_DEC_2_FC_512_DRP_0.4_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.26557214140730717
Validation Accuracy : 0.22252155172413793
Training Loss : 0.515418497853092
Validation Loss : 0.6928889760683323

Epoch : 2
Training Accuracy : 0.3983423545331529
Validation Accuracy : 0.2817887931034483
Training Loss : 0.38393893070690366
Validation Loss : 0.5969208103829416

Epoch : 3
Training Accuracy : 0.49457670838971585
Validation Accuracy : 0.3120689655172414
Training Loss : 0.33913235188901664
Validation Loss : 0.6149375456160513

Epoch : 4
Training Accuracy : 0.5538311907983762
Validation Accuracy : 0.3150862068965517
Training Loss : 0.32013942912241117
Validation Loss : 0.6539349596048224

Epoch : 5
Training Accuracy : 0.5817722428958051
Validation Accuracy : 0.32047413793103446
Training Loss : 0.29596968507331184
Validation Loss : 0.64823461812118


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▄▂▂▁
validation_accuracy,▁▅▇██
validation_loss,█▁▂▅▅

0,1
Epoch,5.0
training_accuracy,0.58177
training_loss,0.29597
validation_accuracy,0.32047
validation_loss,0.64823


[34m[1mwandb[0m: Agent Starting Run: bumtmiye with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_2_DEC_3_FC_512_DRP_0.2_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.262728349120433
Validation Accuracy : 0.21282327586206898
Training Loss : 0.5043293114484405
Validation Loss : 0.6814944057629027

Epoch : 2
Training Accuracy : 0.4197395128552097
Validation Accuracy : 0.280926724137931
Training Loss : 0.39859569836937203
Validation Loss : 0.6676607119626012

Epoch : 3
Training Accuracy : 0.5218517422192152
Validation Accuracy : 0.32241379310344825
Training Loss : 0.3089147124323535
Validation Loss : 0.6188995295557482

Epoch : 4
Training Accuracy : 0.6068377875507442
Validation Accuracy : 0.3317887931034483
Training Loss : 0.25879079289482315
Validation Loss : 0.664207428796538

Epoch : 5
Training Accuracy : 0.648120348443843
Validation Accuracy : 0.3464439655172414
Training Loss : 0.23921560781915874
Validation Loss : 0.6823684318312283


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▇▇█
validation_loss,█▆▁▆█

0,1
Epoch,5.0
training_accuracy,0.64812
training_loss,0.23922
validation_accuracy,0.34644
validation_loss,0.68237


[34m[1mwandb[0m: Agent Starting Run: 9fkf9zwj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_3_DEC_3_FC_256_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.16964436738836267
Validation Accuracy : 0.12146832191780822
Training Loss : 0.5659214660263191
Validation Loss : 0.7883397299949437

Epoch : 2
Training Accuracy : 0.33628636671177264
Validation Accuracy : 0.2568493150684932
Training Loss : 0.4369177616658811
Validation Loss : 0.6249976341855036

Epoch : 3
Training Accuracy : 0.42073325439783493
Validation Accuracy : 0.2980522260273973
Training Loss : 0.3755544751116323
Validation Loss : 0.6140928705261178

Epoch : 4
Training Accuracy : 0.500317151556157
Validation Accuracy : 0.3213827054794521
Training Loss : 0.32200155992953156
Validation Loss : 0.623926438697397

Epoch : 5
Training Accuracy : 0.5575418640054127
Validation Accuracy : 0.3458904109589041
Training Loss : 0.26442858602987734
Validation Loss : 0.5969506734854555


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▄▂▁
validation_accuracy,▁▅▇▇█
validation_loss,█▂▂▂▁

0,1
Epoch,5.0
training_accuracy,0.55754
training_loss,0.26443
validation_accuracy,0.34589
validation_loss,0.59695


[34m[1mwandb[0m: Agent Starting Run: kaz2beqn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_32_ENC_2_DEC_3_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.21568420162381596
Validation Accuracy : 0.191673801369863
Training Loss : 0.5161487066536053
Validation Loss : 0.6629543475908776

Epoch : 2
Training Accuracy : 0.3633499661705007
Validation Accuracy : 0.2706549657534247
Training Loss : 0.4167697415138936
Validation Loss : 0.6369641121119669

Epoch : 3
Training Accuracy : 0.4674285351826793
Validation Accuracy : 0.3248073630136986
Training Loss : 0.3413539917002834
Validation Loss : 0.6061596703039457

Epoch : 4
Training Accuracy : 0.5499936569688768
Validation Accuracy : 0.3513484589041096
Training Loss : 0.2756734533343812
Validation Loss : 0.584255039691925

Epoch : 5
Training Accuracy : 0.6194709912043301
Validation Accuracy : 0.3504922945205479
Training Loss : 0.231441027881812
Validation Loss : 0.628577027010591


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▆▄▂▁
validation_accuracy,▁▄▇██
validation_loss,█▆▃▁▅

0,1
Epoch,5.0
training_accuracy,0.61947
training_loss,0.23144
validation_accuracy,0.35049
validation_loss,0.62858


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vn4glkzc with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_GRU_EMB_512_ENC_3_DEC_2_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.32579922192151556
Validation Accuracy : 0.20366010273972604
Training Loss : 0.4597987218461275
Validation Loss : 0.7060034034186846

Epoch : 2
Training Accuracy : 0.5373393098782138
Validation Accuracy : 0.2875642123287671
Training Loss : 0.30869025770317715
Validation Loss : 0.6868549136266316

Epoch : 3
Training Accuracy : 0.641671600135318
Validation Accuracy : 0.3008347602739726
Training Loss : 0.2404069776101106
Validation Loss : 0.7072693036027151

Epoch : 4
Training Accuracy : 0.6928704330175913
Validation Accuracy : 0.3028681506849315
Training Loss : 0.19842885062602603
Validation Loss : 0.7056669254009038

Epoch : 5
Training Accuracy : 0.7091508795669824
Validation Accuracy : 0.2986943493150685
Training Loss : 0.1865510298733137
Validation Loss : 0.7324675468549336


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▅▇██
training_loss,█▄▂▁▁
validation_accuracy,▁▇███
validation_loss,▄▁▄▄█

0,1
Epoch,5.0
training_accuracy,0.70915
training_loss,0.18655
validation_accuracy,0.29869
validation_loss,0.73247


[34m[1mwandb[0m: Agent Starting Run: g78lhwgj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_RNN_EMB_512_ENC_2_DEC_3_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.07309286197564276
Validation Accuracy : 0.0722388698630137
Training Loss : 0.7050274954113167
Validation Loss : 0.8516899910691667

Epoch : 2
Training Accuracy : 0.11013616373477672
Validation Accuracy : 0.0976027397260274
Training Loss : 0.6663426777829337
Validation Loss : 0.8132968757250537

Epoch : 3
Training Accuracy : 0.12599374154262516
Validation Accuracy : 0.11290667808219178
Training Loss : 0.645934814529264
Validation Loss : 0.8167497687143822

Epoch : 4
Training Accuracy : 0.15091128213802435
Validation Accuracy : 0.12146832191780822
Training Loss : 0.6078379588133589
Validation Loss : 0.7701184892491119

Epoch : 5
Training Accuracy : 0.13187161705006767
Validation Accuracy : 0.11815068493150685
Training Loss : 0.6390854074280704
Validation Loss : 0.7879700660705566

Epoch : 6
Training Accuracy : 0.15376564614343707
Validation Accuracy : 0.1192208904109589
T

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▆▅▆▇▇██
training_loss,█▆▅▂▄▃▂▂▁▁
validation_accuracy,▁▃▅▅▅▅▇▆▇█
validation_loss,█▅▆▃▄▄▄▃▃▁

0,1
Epoch,10.0
training_accuracy,0.17852
training_loss,0.58369
validation_accuracy,0.14908
validation_loss,0.74493


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j5b2g9lm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.16381935047361298
Validation Accuracy : 0.13762842465753425
Training Loss : 0.5566058949704744
Validation Loss : 0.7237140666948606

Epoch : 2
Training Accuracy : 0.29005624154262516
Validation Accuracy : 0.22773972602739725
Training Loss : 0.468933242019039
Validation Loss : 0.6550675585661849

Epoch : 3
Training Accuracy : 0.37642718200270636
Validation Accuracy : 0.293771404109589
Training Loss : 0.40299063041826383
Validation Loss : 0.5996419550621346

Epoch : 4
Training Accuracy : 0.437743149526387
Validation Accuracy : 0.3181720890410959
Training Loss : 0.379126479766043
Validation Loss : 0.6183846017269239

Epoch : 5
Training Accuracy : 0.49060174221921515
Validation Accuracy : 0.3499571917808219
Training Loss : 0.32881503401976964
Validation Loss : 0.5904065944152336


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▃▁
validation_accuracy,▁▄▆▇█
validation_loss,█▄▁▂▁

0,1
Epoch,5.0
training_accuracy,0.4906
training_loss,0.32882
validation_accuracy,0.34996
validation_loss,0.59041


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4gth46m3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 128


run name = ATT_YES_EP_5_CELL_GRU_EMB_64_ENC_3_DEC_3_FC_128_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.138288650202977
Validation Accuracy : 0.10851883561643835
Training Loss : 0.5705527993113165
Validation Loss : 0.7535588455526796

Epoch : 2
Training Accuracy : 0.2520720568335589
Validation Accuracy : 0.22131849315068494
Training Loss : 0.49071747463836723
Validation Loss : 0.645120669309407

Epoch : 3
Training Accuracy : 0.334161451285521
Validation Accuracy : 0.2736515410958904
Training Loss : 0.43906614099690977
Validation Loss : 0.6119634700964575

Epoch : 4
Training Accuracy : 0.3856140054127199
Validation Accuracy : 0.2990154109589041
Training Loss : 0.39379139665983043
Validation Loss : 0.587410085821805

Epoch : 5
Training Accuracy : 0.4234184709066306
Validation Accuracy : 0.3108946917808219
Training Loss : 0.36916415417549253
Validation Loss : 0.5986038670964438


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▇██
validation_loss,█▃▂▁▁

0,1
Epoch,5.0
training_accuracy,0.42342
training_loss,0.36916
validation_accuracy,0.31089
validation_loss,0.5986


[34m[1mwandb[0m: Agent Starting Run: ptyhw5js with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.192976150202977
Validation Accuracy : 0.1657748287671233
Training Loss : 0.5333739525168448
Validation Loss : 0.6691593148120462

Epoch : 2
Training Accuracy : 0.37234649864682
Validation Accuracy : 0.2745077054794521
Training Loss : 0.4244276496086456
Validation Loss : 0.639359813030452

Epoch : 3
Training Accuracy : 0.4767845060893099
Validation Accuracy : 0.3143193493150685
Training Loss : 0.3302885270973665
Validation Loss : 0.5824642703957754

Epoch : 4
Training Accuracy : 0.5483444688768606
Validation Accuracy : 0.3306934931506849
Training Loss : 0.30225899353804864
Validation Loss : 0.6294890809549044

Epoch : 5
Training Accuracy : 0.6148300067658998
Validation Accuracy : 0.3494220890410959
Training Loss : 0.24146829111972906
Validation Loss : 0.6138014238174647


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▇▇█
validation_loss,█▆▁▅▄

0,1
Epoch,5.0
training_accuracy,0.61483
training_loss,0.24147
validation_accuracy,0.34942
validation_loss,0.6138


[34m[1mwandb[0m: Agent Starting Run: z57h3xoo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.003488667117726658
Validation Accuracy : 0.0014982876712328766
Training Loss : 0.9586262055275727
Validation Loss : 1.2748145028336408

Epoch : 2
Training Accuracy : 0.29558525033829497
Validation Accuracy : 0.20794092465753425
Training Loss : 0.4957272975386722
Validation Loss : 0.7279311381790736

Epoch : 3
Training Accuracy : 0.5056347259810555
Validation Accuracy : 0.3028681506849315
Training Loss : 0.32075222092649125
Validation Loss : 0.6312926983180112

Epoch : 4
Training Accuracy : 0.614999154262517
Validation Accuracy : 0.3284460616438356
Training Loss : 0.24333354957774786
Validation Loss : 0.6447027705303611

Epoch : 5
Training Accuracy : 0.6872674221921515
Validation Accuracy : 0.3470676369863014
Training Loss : 0.19693658973754502
Validation Loss : 0.6475086293808402


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▄▂▁▁
validation_accuracy,▁▅▇██
validation_loss,█▂▁▁▁

0,1
Epoch,5.0
training_accuracy,0.68727
training_loss,0.19694
validation_accuracy,0.34707
validation_loss,0.64751


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2jcfb88w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_64_ENC_2_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.22626649188092016
Validation Accuracy : 0.19060359589041095
Training Loss : 0.504568862455301
Validation Loss : 0.6664612342233527

Epoch : 2
Training Accuracy : 0.3691749830852503
Validation Accuracy : 0.2655179794520548
Training Loss : 0.3959125841744698
Validation Loss : 0.5983049591926679

Epoch : 3
Training Accuracy : 0.45986975642760486
Validation Accuracy : 0.3136772260273973
Training Loss : 0.33636546963687197
Validation Loss : 0.5806278349602059

Epoch : 4
Training Accuracy : 0.5351721075778079
Validation Accuracy : 0.3492080479452055
Training Loss : 0.2945587855230649
Validation Loss : 0.5950121014085534

Epoch : 5
Training Accuracy : 0.600198748308525
Validation Accuracy : 0.366652397260274
Training Loss : 0.2530680713577748
Validation Loss : 0.6075482184756292


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▂▁▂▃

0,1
Epoch,5.0
training_accuracy,0.6002
training_loss,0.25307
validation_accuracy,0.36665
validation_loss,0.60755


[34m[1mwandb[0m: Agent Starting Run: 5aeeobxk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.16247674221921515
Validation Accuracy : 0.1343107876712329
Training Loss : 0.557539489135368
Validation Loss : 0.7326402394738916

Epoch : 2
Training Accuracy : 0.29796388700947224
Validation Accuracy : 0.2513912671232877
Training Loss : 0.46322496483387254
Validation Loss : 0.634259384788879

Epoch : 3
Training Accuracy : 0.38804550067659
Validation Accuracy : 0.3051155821917808
Training Loss : 0.402877898153336
Validation Loss : 0.6228821555229083

Epoch : 4
Training Accuracy : 0.4370454161028417
Validation Accuracy : 0.3200984589041096
Training Loss : 0.35946061152647246
Validation Loss : 0.5969593130970654

Epoch : 5
Training Accuracy : 0.49294866373477675
Validation Accuracy : 0.3397902397260274
Training Loss : 0.32619076680911246
Validation Loss : 0.6062468535279575


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▇▇█
validation_loss,█▃▂▁▁

0,1
Epoch,5.0
training_accuracy,0.49295
training_loss,0.32619
validation_accuracy,0.33979
validation_loss,0.60625


[34m[1mwandb[0m: Agent Starting Run: 1o021r6o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_64_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.016460165764546685
Validation Accuracy : 0.012200342465753425
Training Loss : 0.8327051506797419
Validation Loss : 1.0674239054118118

Epoch : 2
Training Accuracy : 0.2439106901217862
Validation Accuracy : 0.2124357876712329
Training Loss : 0.5095918322528973
Validation Loss : 0.6654009259726903

Epoch : 3
Training Accuracy : 0.3583601150202977
Validation Accuracy : 0.2870291095890411
Training Loss : 0.4186042182661684
Validation Loss : 0.6060027712828493

Epoch : 4
Training Accuracy : 0.42649484100135315
Validation Accuracy : 0.2955907534246575
Training Loss : 0.3604182031545007
Validation Loss : 0.5825986894842696

Epoch : 5
Training Accuracy : 0.4806960419485792
Validation Accuracy : 0.3196703767123288
Training Loss : 0.3510843391589447
Validation Loss : 0.6426555440850454


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▃▂▁▁
validation_accuracy,▁▆▇▇█
validation_loss,█▂▁▁▂

0,1
Epoch,5.0
training_accuracy,0.4807
training_loss,0.35108
validation_accuracy,0.31967
validation_loss,0.64266


[34m[1mwandb[0m: Agent Starting Run: p2xt61nb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_GRU_EMB_128_ENC_2_DEC_3_FC_512_DRP_0.2_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23987229364005414
Validation Accuracy : 0.19461206896551725
Training Loss : 0.536441863591358
Validation Loss : 0.7132016526214008

Epoch : 2
Training Accuracy : 0.33318885317997293
Validation Accuracy : 0.24622844827586207
Training Loss : 0.48125547380344147
Validation Loss : 0.7029184284909018

Epoch : 3
Training Accuracy : 0.357038650202977
Validation Accuracy : 0.2404094827586207
Training Loss : 0.4463117231132213
Validation Loss : 0.6781765574011309

Epoch : 4
Training Accuracy : 0.3683715324763194
Validation Accuracy : 0.2504310344827586
Training Loss : 0.45024707488947535
Validation Loss : 0.6984957837852939

Epoch : 5
Training Accuracy : 0.3713316136671177
Validation Accuracy : 0.23081896551724138
Training Loss : 0.439553246970751
Validation Loss : 0.6897521933604931


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▆▇██
training_loss,█▄▁▂▁
validation_accuracy,▁▇▇█▆
validation_loss,█▆▁▅▃

0,1
Epoch,5.0
training_accuracy,0.37133
training_loss,0.43955
validation_accuracy,0.23082
validation_loss,0.68975


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ju8hdhzy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_2_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.17980378890392423
Validation Accuracy : 0.16127996575342465
Training Loss : 0.5564469179700612
Validation Loss : 0.7121587759011412

Epoch : 2
Training Accuracy : 0.35028332205683355
Validation Accuracy : 0.2551369863013699
Training Loss : 0.4362769446692383
Validation Loss : 0.6324309159631598

Epoch : 3
Training Accuracy : 0.44947775710419485
Validation Accuracy : 0.3076840753424658
Training Loss : 0.3709237605373656
Validation Loss : 0.6117513930960877

Epoch : 4
Training Accuracy : 0.5252558355886333
Validation Accuracy : 0.3351883561643836
Training Loss : 0.33279642535320636
Validation Loss : 0.6400207456660597

Epoch : 5
Training Accuracy : 0.5716339648173207
Validation Accuracy : 0.3517765410958904
Training Loss : 0.2808804569692831
Validation Loss : 0.5890234227049841


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▃▂▄▁

0,1
Epoch,5.0
training_accuracy,0.57163
training_loss,0.28088
validation_accuracy,0.35178
validation_loss,0.58902


[34m[1mwandb[0m: Agent Starting Run: 4jllh5ug with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_64_ENC_2_DEC_2_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.27661958728010827
Validation Accuracy : 0.2246361301369863
Training Loss : 0.4819064667002274
Validation Loss : 0.6512355432934958

Epoch : 2
Training Accuracy : 0.4048439614343708
Validation Accuracy : 0.2923801369863014
Training Loss : 0.39270768513214926
Validation Loss : 0.6040640473365784

Epoch : 3
Training Accuracy : 0.5076856393775372
Validation Accuracy : 0.3217037671232877
Training Loss : 0.3070302395117299
Validation Loss : 0.5893994341157887

Epoch : 4
Training Accuracy : 0.5988455683355887
Validation Accuracy : 0.343214897260274
Training Loss : 0.25666738855016086
Validation Loss : 0.6349430002578317

Epoch : 5
Training Accuracy : 0.6745179296346414
Validation Accuracy : 0.3623715753424658
Training Loss : 0.19610557928983832
Validation Loss : 0.6094350541291171


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▃▅▇█
training_loss,█▆▄▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▃▁▆▃

0,1
Epoch,5.0
training_accuracy,0.67452
training_loss,0.19611
validation_accuracy,0.36237
validation_loss,0.60944


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: azoty94y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_16_ENC_1_DEC_3_FC_256_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.0332797699594046
Validation Accuracy : 0.03392551369863014
Training Loss : 0.7557430992429731
Validation Loss : 0.9266114520700011

Epoch : 2
Training Accuracy : 0.18846202638700948
Validation Accuracy : 0.1542166095890411
Training Loss : 0.539908448680328
Validation Loss : 0.7299585579192802

Epoch : 3
Training Accuracy : 0.29037339309878213
Validation Accuracy : 0.2221746575342466
Training Loss : 0.4722196642858895
Validation Loss : 0.6952958115159649

Epoch : 4
Training Accuracy : 0.3711730378890392
Validation Accuracy : 0.24860873287671234
Training Loss : 0.3857286851122189
Validation Loss : 0.6392946904652739

Epoch : 5
Training Accuracy : 0.43828230717185385
Validation Accuracy : 0.2827482876712329
Training Loss : 0.34598839956465527
Validation Loss : 0.6415067090563578


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▄▃▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▃▂▁▁

0,1
Epoch,5.0
training_accuracy,0.43828
training_loss,0.34599
validation_accuracy,0.28275
validation_loss,0.64151


[34m[1mwandb[0m: Agent Starting Run: fcnzho0g with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_GRU_EMB_128_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2184434201623816
Validation Accuracy : 0.1797945205479452
Training Loss : 0.521990729281641
Validation Loss : 0.6894558423186001

Epoch : 2
Training Accuracy : 0.32381173883626524
Validation Accuracy : 0.2533176369863014
Training Loss : 0.47777404409300167
Validation Loss : 0.675033355004167

Epoch : 3
Training Accuracy : 0.3806558694181326
Validation Accuracy : 0.2794306506849315
Training Loss : 0.41741015543311794
Validation Loss : 0.6399159100774217

Epoch : 4
Training Accuracy : 0.43695027063599456
Validation Accuracy : 0.3004066780821918
Training Loss : 0.3554940222282371
Validation Loss : 0.605928797836173

Epoch : 5
Training Accuracy : 0.4572796853856563
Validation Accuracy : 0.2989083904109589
Training Loss : 0.3685929116078417
Validation Loss : 0.6411932731327945


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▆▄▁▂
validation_accuracy,▁▅▇██
validation_loss,█▇▄▁▄

0,1
Epoch,5.0
training_accuracy,0.45728
training_loss,0.36859
validation_accuracy,0.29891
validation_loss,0.64119


[34m[1mwandb[0m: Agent Starting Run: osr3tpau with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 128


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_1_DEC_2_FC_128_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.16668428619756429
Validation Accuracy : 0.1484375
Training Loss : 0.5677837664122833
Validation Loss : 0.722031878281946

Epoch : 2
Training Accuracy : 0.24168005751014884
Validation Accuracy : 0.19424229452054795
Training Loss : 0.4918299232671322
Validation Loss : 0.6874523158759287

Epoch : 3
Training Accuracy : 0.298651048714479
Validation Accuracy : 0.22153253424657535
Training Loss : 0.45849334858912416
Validation Loss : 0.6794563421647842

Epoch : 4
Training Accuracy : 0.3284950101488498
Validation Accuracy : 0.2387628424657534
Training Loss : 0.44644607988681456
Validation Loss : 0.676789936545777

Epoch : 5
Training Accuracy : 0.37033787212449254
Validation Accuracy : 0.258347602739726
Training Loss : 0.40462271330805044
Validation Loss : 0.6498109016516437


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▃▁
validation_accuracy,▁▄▆▇█
validation_loss,█▅▄▄▁

0,1
Epoch,5.0
training_accuracy,0.37034
training_loss,0.40462
validation_accuracy,0.25835
validation_loss,0.64981


[34m[1mwandb[0m: Agent Starting Run: m6o8xce5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_RNN_EMB_256_ENC_2_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.12756892760487146
Validation Accuracy : 0.11098030821917808
Training Loss : 0.6453205972143697
Validation Loss : 0.7895489580010715

Epoch : 2
Training Accuracy : 0.17921177266576455
Validation Accuracy : 0.14950770547945205
Training Loss : 0.5809313031026895
Validation Loss : 0.7271092779015842

Epoch : 3
Training Accuracy : 0.19772285182679297
Validation Accuracy : 0.1740154109589041
Training Loss : 0.5619568760243417
Validation Loss : 0.7153281912411729

Epoch : 4
Training Accuracy : 0.20790341677943167
Validation Accuracy : 0.17829623287671234
Training Loss : 0.5621603419235498
Validation Loss : 0.7303287468544425

Epoch : 5
Training Accuracy : 0.22303154600811909
Validation Accuracy : 0.17669092465753425
Training Loss : 0.5587380036449562
Validation Loss : 0.7277275993399424


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▅▆▇█
training_loss,█▃▁▁▁
validation_accuracy,▁▅███
validation_loss,█▂▁▂▂

0,1
Epoch,5.0
training_accuracy,0.22303
training_loss,0.55874
validation_accuracy,0.17669
validation_loss,0.72773


[34m[1mwandb[0m: Agent Starting Run: sefvkd8u with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_32_ENC_3_DEC_3_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23279981393775373
Validation Accuracy : 0.1991652397260274
Training Loss : 0.4968285003276252
Validation Loss : 0.641666538911323

Epoch : 2
Training Accuracy : 0.4136819181326116
Validation Accuracy : 0.2793236301369863
Training Loss : 0.36657778446906636
Validation Loss : 0.608306637365524

Epoch : 3
Training Accuracy : 0.5486616204330176
Validation Accuracy : 0.3381849315068493
Training Loss : 0.2659332425370107
Validation Loss : 0.5785519866502449

Epoch : 4
Training Accuracy : 0.6181600981055481
Validation Accuracy : 0.3409674657534247
Training Loss : 0.2244146852495706
Validation Loss : 0.6246633278588726

Epoch : 5
Training Accuracy : 0.6339013870094723
Validation Accuracy : 0.3394691780821918
Training Loss : 0.2103402501171271
Validation Loss : 0.6223169346786526


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▇██
training_loss,█▅▂▁▁
validation_accuracy,▁▅███
validation_loss,█▄▁▆▆

0,1
Epoch,5.0
training_accuracy,0.6339
training_loss,0.21034
validation_accuracy,0.33947
validation_loss,0.62232


[34m[1mwandb[0m: Agent Starting Run: cx1cjvwm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_32_ENC_2_DEC_2_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23078061569688768
Validation Accuracy : 0.2033390410958904
Training Loss : 0.5505235048769614
Validation Loss : 0.7084465871935022

Epoch : 2
Training Accuracy : 0.4185554803788904
Validation Accuracy : 0.2818921232876712
Training Loss : 0.3532308935110882
Validation Loss : 0.5904584380045329

Epoch : 3
Training Accuracy : 0.5150541271989174
Validation Accuracy : 0.3224529109589041
Training Loss : 0.32333270856588875
Validation Loss : 0.6600096813619953

Epoch : 4
Training Accuracy : 0.6274843538565629
Validation Accuracy : 0.3498501712328767
Training Loss : 0.23008940212620127
Validation Loss : 0.6215610222457206

Epoch : 5
Training Accuracy : 0.6880497293640054
Validation Accuracy : 0.3625856164383562
Training Loss : 0.19031497443924092
Validation Loss : 0.6335755284518412


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▄▄▂▁
validation_accuracy,▁▄▆▇█
validation_loss,█▁▅▃▄

0,1
Epoch,5.0
training_accuracy,0.68805
training_loss,0.19031
validation_accuracy,0.36259
validation_loss,0.63358


[34m[1mwandb[0m: Agent Starting Run: msy4mbe6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_GRU_EMB_32_ENC_3_DEC_1_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2230209742895805
Validation Accuracy : 0.1796875
Training Loss : 0.53150197609513
Validation Loss : 0.6953441749696863

Epoch : 2
Training Accuracy : 0.3412867895805142
Validation Accuracy : 0.2421875
Training Loss : 0.4532699989853112
Validation Loss : 0.6570820412407182

Epoch : 3
Training Accuracy : 0.4022750338294993
Validation Accuracy : 0.2694777397260274
Training Loss : 0.40534198788247994
Validation Loss : 0.653791264311908

Epoch : 4
Training Accuracy : 0.42212872124492556
Validation Accuracy : 0.2717251712328767
Training Loss : 0.415641030213507
Validation Loss : 0.6830918323503782

Epoch : 5
Training Accuracy : 0.4574594046008119
Validation Accuracy : 0.2863869863013699
Training Loss : 0.36354981503967343
Validation Loss : 0.6382313742213053

Epoch : 6
Training Accuracy : 0.47771481732070364
Validation Accuracy : 0.2863869863013699
Training Loss : 0.34336588

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▅▆▇▇███
training_loss,█▅▄▄▃▂▂▁▁▁
validation_accuracy,▁▅▆▆▇▇███▇
validation_loss,█▃▃▆▁▁▃▆▅▅

0,1
Epoch,10.0
training_accuracy,0.53302
training_loss,0.31313
validation_accuracy,0.28574
validation_loss,0.67459


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: km99l4ta with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_64_ENC_3_DEC_3_FC_256_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.18927604871447903
Validation Accuracy : 0.1583904109589041
Training Loss : 0.5257882806138515
Validation Loss : 0.6937476657841304

Epoch : 2
Training Accuracy : 0.3499661705006766
Validation Accuracy : 0.2709760273972603
Training Loss : 0.4224378791649061
Validation Loss : 0.6374763425898878

Epoch : 3
Training Accuracy : 0.4478074255751015
Validation Accuracy : 0.3068279109589041
Training Loss : 0.3322817912080775
Validation Loss : 0.5901148874465734

Epoch : 4
Training Accuracy : 0.5372653078484438
Validation Accuracy : 0.3407534246575342
Training Loss : 0.2819798063481774
Validation Loss : 0.6054448256753895

Epoch : 5
Training Accuracy : 0.5923545331529093
Validation Accuracy : 0.3458904109589041
Training Loss : 0.23250897942521737
Validation Loss : 0.5912596599696434


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▆▃▂▁
validation_accuracy,▁▅▇██
validation_loss,█▄▁▂▁

0,1
Epoch,5.0
training_accuracy,0.59235
training_loss,0.23251
validation_accuracy,0.34589
validation_loss,0.59126


[34m[1mwandb[0m: Agent Starting Run: yb53marh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2015815290933694
Validation Accuracy : 0.17101883561643835
Training Loss : 0.5446946031750458
Validation Loss : 0.6923645849097265

Epoch : 2
Training Accuracy : 0.3637199763193505
Validation Accuracy : 0.2597388698630137
Training Loss : 0.42307889598632215
Validation Loss : 0.6258869111946185

Epoch : 3
Training Accuracy : 0.4672699594046008
Validation Accuracy : 0.2982662671232877
Training Loss : 0.35899832602766757
Validation Loss : 0.6343223362752836

Epoch : 4
Training Accuracy : 0.5395170838971584
Validation Accuracy : 0.3367936643835616
Training Loss : 0.2979633095340251
Validation Loss : 0.5812792190133709

Epoch : 5
Training Accuracy : 0.6037402740189445
Validation Accuracy : 0.3485659246575342
Training Loss : 0.2580095009729569
Validation Loss : 0.6133281788597368


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▄▆██
validation_loss,█▄▄▁▃

0,1
Epoch,5.0
training_accuracy,0.60374
training_loss,0.25801
validation_accuracy,0.34857
validation_loss,0.61333


[34m[1mwandb[0m: Agent Starting Run: snm0g4yz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_GRU_EMB_64_ENC_2_DEC_2_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23367726657645466
Validation Accuracy : 0.1889982876712329
Training Loss : 0.527878761251176
Validation Loss : 0.6953087146968058

Epoch : 2
Training Accuracy : 0.3824530615696888
Validation Accuracy : 0.2751498287671233
Training Loss : 0.41500800247121405
Validation Loss : 0.6326007104083283

Epoch : 3
Training Accuracy : 0.4513700947225981
Validation Accuracy : 0.2918450342465753
Training Loss : 0.3834384634627057
Validation Loss : 0.6726902888654029

Epoch : 4
Training Accuracy : 0.49627875507442487
Validation Accuracy : 0.2961258561643836
Training Loss : 0.3312926843096987
Validation Loss : 0.6354506875145925

Epoch : 5
Training Accuracy : 0.4790574255751015
Validation Accuracy : 0.2845676369863014
Training Loss : 0.3613172034772065
Validation Loss : 0.6821754746241112

Epoch : 6
Training Accuracy : 0.5307531292286874
Validation Accuracy : 0.2993364726027397
Traini

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▆▇▇██▇▇▇
training_loss,█▄▃▂▃▁▁▂▂▁
validation_accuracy,▁▆▇█▇█████
validation_loss,▆▁▄▁▅▂▄█▄▃

0,1
Epoch,10.0
training_accuracy,0.51615
training_loss,0.31779
validation_accuracy,0.2957
validation_loss,0.65909


[34m[1mwandb[0m: Agent Starting Run: 239ytpd1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_GRU_EMB_128_ENC_3_DEC_2_FC_512_DRP_0.2_BS_64_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23270466847090662
Validation Accuracy : 0.17974137931034484
Training Loss : 0.548666023433773
Validation Loss : 0.7540452544031472

Epoch : 2
Training Accuracy : 0.3144134810554804
Validation Accuracy : 0.20926724137931035
Training Loss : 0.4778929344291455
Validation Loss : 0.7194501052642691

Epoch : 3
Training Accuracy : 0.34650921853856564
Validation Accuracy : 0.23954741379310346
Training Loss : 0.46090257623521497
Validation Loss : 0.6997626193638505

Epoch : 4
Training Accuracy : 0.349226150202977
Validation Accuracy : 0.221875
Training Loss : 0.46891383949732746
Validation Loss : 0.7556546765154806

Epoch : 5
Training Accuracy : 0.3490464309878214
Validation Accuracy : 0.22144396551724138
Training Loss : 0.4672891179828102
Validation Loss : 0.7233248980908559


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▆███
training_loss,█▂▁▂▂
validation_accuracy,▁▄█▆▆
validation_loss,█▃▁█▄

0,1
Epoch,5.0
training_accuracy,0.34905
training_loss,0.46729
validation_accuracy,0.22144
validation_loss,0.72332


[34m[1mwandb[0m: Agent Starting Run: 64h6qtqx with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_2_DEC_1_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.29350262178619757
Validation Accuracy : 0.2345890410958904
Training Loss : 0.4810880151588636
Validation Loss : 0.6899595146309839

Epoch : 2
Training Accuracy : 0.4214204161028417
Validation Accuracy : 0.2960188356164384
Training Loss : 0.386342399074357
Validation Loss : 0.6254717189155213

Epoch : 3
Training Accuracy : 0.5426780277401895
Validation Accuracy : 0.3374357876712329
Training Loss : 0.301903854022168
Validation Loss : 0.6503970700584046

Epoch : 4
Training Accuracy : 0.586106647496617
Validation Accuracy : 0.3408604452054795
Training Loss : 0.2698844874300395
Validation Loss : 0.644585618009306

Epoch : 5
Training Accuracy : 0.6592523680649527
Validation Accuracy : 0.3407534246575342
Training Loss : 0.22151213488131963
Validation Loss : 0.6579807774253088


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▃▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅███
validation_loss,█▁▄▃▅

0,1
Epoch,5.0
training_accuracy,0.65925
training_loss,0.22151
validation_accuracy,0.34075
validation_loss,0.65798


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5h3sjf77 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_GRU_EMB_16_ENC_2_DEC_1_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2166885148849797
Validation Accuracy : 0.17112585616438356
Training Loss : 0.5103536276839905
Validation Loss : 0.681389424898853

Epoch : 2
Training Accuracy : 0.29952850135318
Validation Accuracy : 0.2204623287671233
Training Loss : 0.49288105404102756
Validation Loss : 0.6882179987757173

Epoch : 3
Training Accuracy : 0.33580006765899867
Validation Accuracy : 0.21971318493150685
Training Loss : 0.44397963996830425
Validation Loss : 0.6701705300644653

Epoch : 4
Training Accuracy : 0.49193377875507444
Validation Accuracy : 0.2970890410958904
Training Loss : 0.3338062077034794
Validation Loss : 0.6395142637703517

Epoch : 5
Training Accuracy : 0.4963316136671177
Validation Accuracy : 0.2935573630136986
Training Loss : 0.3203256473610624
Validation Loss : 0.624865128161156


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▃▄██
training_loss,█▇▆▁▁
validation_accuracy,▁▄▄██
validation_loss,▇█▆▃▁

0,1
Epoch,5.0
training_accuracy,0.49633
training_loss,0.32033
validation_accuracy,0.29356
validation_loss,0.62487


[34m[1mwandb[0m: Agent Starting Run: jbm6pajw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_64_ENC_3_DEC_3_FC_256_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.18132611637347767
Validation Accuracy : 0.1662029109589041
Training Loss : 0.5749067427824895
Validation Loss : 0.7096345787995482

Epoch : 2
Training Accuracy : 0.3571655108254398
Validation Accuracy : 0.2594178082191781
Training Loss : 0.4151440961273823
Validation Loss : 0.652725583069945

Epoch : 3
Training Accuracy : 0.45427731732070364
Validation Accuracy : 0.3043664383561644
Training Loss : 0.31556808412155696
Validation Loss : 0.5862258938077378

Epoch : 4
Training Accuracy : 0.5284062077131259
Validation Accuracy : 0.3301583904109589
Training Loss : 0.2655997486616181
Validation Loss : 0.565267413446348

Epoch : 5
Training Accuracy : 0.5918153755074425
Validation Accuracy : 0.3374357876712329
Training Loss : 0.23111635496958344
Validation Loss : 0.6167597374687456


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▇██
validation_loss,█▅▂▁▃

0,1
Epoch,5.0
training_accuracy,0.59182
training_loss,0.23112
validation_accuracy,0.33744
validation_loss,0.61676


[34m[1mwandb[0m: Agent Starting Run: ex2c4m2d with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_16_ENC_2_DEC_2_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.0004334404600811908
Validation Accuracy : 0.0
Training Loss : 1.0329821651778783
Validation Loss : 1.3878186892156732

Epoch : 2
Training Accuracy : 0.01644959404600812
Validation Accuracy : 0.006635273972602739
Training Loss : 0.8829434618091713
Validation Loss : 1.2761534280972937

Epoch : 3
Training Accuracy : 0.259345399188092
Validation Accuracy : 0.13035102739726026
Training Loss : 0.5116228061212739
Validation Loss : 0.8205665750046299

Epoch : 4
Training Accuracy : 0.5331317658998647
Validation Accuracy : 0.18225599315068494
Training Loss : 0.3033728108354447
Validation Loss : 0.8476441155557763

Epoch : 5
Training Accuracy : 0.6807446718538566
Validation Accuracy : 0.2144691780821918
Training Loss : 0.1979873964363893
Validation Loss : 0.8429304724686766


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▁▄▆█
training_loss,█▇▄▂▁
validation_accuracy,▁▁▅▇█
validation_loss,█▇▁▁▁

0,1
Epoch,5.0
training_accuracy,0.68074
training_loss,0.19799
validation_accuracy,0.21447
validation_loss,0.84293


[34m[1mwandb[0m: Agent Starting Run: 2b0y125m with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_LSTM_EMB_128_ENC_2_DEC_1_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.16850262178619757
Validation Accuracy : 0.1446917808219178
Training Loss : 0.5800098164236432
Validation Loss : 0.7369520019178522

Epoch : 2
Training Accuracy : 0.3849691305818674
Validation Accuracy : 0.2600599315068493
Training Loss : 0.4223614261260052
Validation Loss : 0.669631206825988

Epoch : 3
Training Accuracy : 0.5052752875507442
Validation Accuracy : 0.3053296232876712
Training Loss : 0.35399332459063265
Validation Loss : 0.6713772812118269

Epoch : 4
Training Accuracy : 0.6195344215155616
Validation Accuracy : 0.3378638698630137
Training Loss : 0.25722043744524054
Validation Loss : 0.6457897216489871

Epoch : 5
Training Accuracy : 0.6690840663058186
Validation Accuracy : 0.3411815068493151
Training Loss : 0.2222545108227349
Validation Loss : 0.6438059778246161

Epoch : 6
Training Accuracy : 0.7301251691474966
Validation Accuracy : 0.3470676369863014
Trai

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▆▆▇▇███
training_loss,█▆▅▃▃▂▂▁▁▁
validation_accuracy,▁▅▆▇██▇██▇
validation_loss,█▃▃▁▁▃▄▅▇▇

0,1
Epoch,10.0
training_accuracy,0.8205
training_loss,0.11708
validation_accuracy,0.33315
validation_loss,0.72793


[34m[1mwandb[0m: Agent Starting Run: srcvvrz6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_10_CELL_LSTM_EMB_512_ENC_2_DEC_3_FC_256_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.21295669824086602
Validation Accuracy : 0.17476455479452055
Training Loss : 0.4944145970641357
Validation Loss : 0.6654570466035032

Epoch : 2
Training Accuracy : 0.32897073748308525
Validation Accuracy : 0.2554580479452055
Training Loss : 0.4460934383620751
Validation Loss : 0.642961562904593

Epoch : 3
Training Accuracy : 0.41778374492557513
Validation Accuracy : 0.288527397260274
Training Loss : 0.36191431609397007
Validation Loss : 0.5911618291515194

Epoch : 4
Training Accuracy : 0.47691136671177264
Validation Accuracy : 0.3180650684931507
Training Loss : 0.3286391412574642
Validation Loss : 0.6059194811402935

Epoch : 5
Training Accuracy : 0.5184370771312584
Validation Accuracy : 0.3267337328767123
Training Loss : 0.2923374842040755
Validation Loss : 0.5997811282334262

Epoch : 6
Training Accuracy : 0.5769726826792964
Validation Accuracy : 0.3500642123287671
Tr

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▅▆▇▇██
training_loss,█▇▅▄▄▃▂▂▁▁
validation_accuracy,▁▄▅▆▇█████
validation_loss,█▆▁▂▂▃▃▅▄▆

0,1
Epoch,10.0
training_accuracy,0.70395
training_loss,0.17698
validation_accuracy,0.3598
validation_loss,0.64083


[34m[1mwandb[0m: Agent Starting Run: vk48zc99 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_512_ENC_2_DEC_3_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.23565417794316645
Validation Accuracy : 0.19028253424657535
Training Loss : 0.48035101598589924
Validation Loss : 0.659989648485837

Epoch : 2
Training Accuracy : 0.3752748646820027
Validation Accuracy : 0.2724743150684932
Training Loss : 0.4028498778807778
Validation Loss : 0.6188496837060745

Epoch : 3
Training Accuracy : 0.46574763193504737
Validation Accuracy : 0.2955907534246575
Training Loss : 0.35366605433946696
Validation Loss : 0.628009362580025

Epoch : 4
Training Accuracy : 0.5408808355886333
Validation Accuracy : 0.3310145547945205
Training Loss : 0.3118426024066902
Validation Loss : 0.6460112361875299

Epoch : 5
Training Accuracy : 0.6071020805142084
Validation Accuracy : 0.3381849315068493
Training Loss : 0.2525864072315425
Validation Loss : 0.6188348874653855


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▆▄▃▁
validation_accuracy,▁▅▆██
validation_loss,█▁▃▆▁

0,1
Epoch,5.0
training_accuracy,0.6071
training_loss,0.25259
validation_accuracy,0.33818
validation_loss,0.61883


[34m[1mwandb[0m: Agent Starting Run: emm9gddk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_32_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.06898046346414073
Validation Accuracy : 0.061964897260273974
Training Loss : 0.6877680237786857
Validation Loss : 0.8446426358941483

Epoch : 2
Training Accuracy : 0.21904600811907984
Validation Accuracy : 0.1915667808219178
Training Loss : 0.49162516159198283
Validation Loss : 0.637360300103279

Epoch : 3
Training Accuracy : 0.3078590155615697
Validation Accuracy : 0.24507705479452055
Training Loss : 0.45087445721897285
Validation Loss : 0.632716418945626

Epoch : 4
Training Accuracy : 0.37090874492557513
Validation Accuracy : 0.2796446917808219
Training Loss : 0.3909336532565673
Validation Loss : 0.5909419598644727

Epoch : 5
Training Accuracy : 0.42280531123139375
Validation Accuracy : 0.3110017123287671
Training Loss : 0.3758154364658144
Validation Loss : 0.6122121990543522


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▄▃▁▁
validation_accuracy,▁▅▆▇█
validation_loss,█▂▂▁▂

0,1
Epoch,5.0
training_accuracy,0.42281
training_loss,0.37582
validation_accuracy,0.311
validation_loss,0.61221


[34m[1mwandb[0m: Agent Starting Run: gkixgsvg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_GRU_EMB_64_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.19349416441136671
Validation Accuracy : 0.168236301369863
Training Loss : 0.5411945563288599
Validation Loss : 0.6893894598908621

Epoch : 2
Training Accuracy : 0.2949086603518268
Validation Accuracy : 0.22731164383561644
Training Loss : 0.491611052430531
Validation Loss : 0.6775099978055039

Epoch : 3
Training Accuracy : 0.35253509810554806
Validation Accuracy : 0.24539811643835616
Training Loss : 0.4434053224784918
Validation Loss : 0.6739212025518286

Epoch : 4
Training Accuracy : 0.3765963294993234
Validation Accuracy : 0.253638698630137
Training Loss : 0.4299040160095256
Validation Loss : 0.664359110267195

Epoch : 5
Training Accuracy : 0.35778924221921515
Validation Accuracy : 0.2425085616438356
Training Loss : 0.44497454166412354
Validation Loss : 0.671317069906078

Epoch : 6
Training Accuracy : 0.3629799560216509
Validation Accuracy : 0.24518407534246575
Traini

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▅▇█▇▇▇███
training_loss,█▅▃▂▃▃▃▁▁▂
validation_accuracy,▁▅▇▇▇▇▆▇█▇
validation_loss,▇▆▅▄▅▇▆▁▃█

0,1
Epoch,10.0
training_accuracy,0.37281
training_loss,0.43339
validation_accuracy,0.24422
validation_loss,0.69604


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3yrisbt2 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_512_ENC_2_DEC_2_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.31192912719891747
Validation Accuracy : 0.2364083904109589
Training Loss : 0.4778937110639554
Validation Loss : 0.6675205459333446

Epoch : 2
Training Accuracy : 0.41688514884979705
Validation Accuracy : 0.2852097602739726
Training Loss : 0.39087599112635857
Validation Loss : 0.6308662593364716

Epoch : 3
Training Accuracy : 0.515868149526387
Validation Accuracy : 0.3135702054794521
Training Loss : 0.3225744766295359
Validation Loss : 0.632066304961296

Epoch : 4
Training Accuracy : 0.5854300575101489
Validation Accuracy : 0.3396832191780822
Training Loss : 0.2837264052868212
Validation Loss : 0.6461243274277204

Epoch : 5
Training Accuracy : 0.6315121786197564
Validation Accuracy : 0.3418236301369863
Training Loss : 0.24429356465078336
Validation Loss : 0.6493089607317154


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▃▅▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▄▆██
validation_loss,█▁▁▄▅

0,1
Epoch,5.0
training_accuracy,0.63151
training_loss,0.24429
validation_accuracy,0.34182
validation_loss,0.64931


[34m[1mwandb[0m: Agent Starting Run: ut0mtt1w with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_LSTM_EMB_512_ENC_3_DEC_2_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.2404960250338295
Validation Accuracy : 0.2046232876712329
Training Loss : 0.5410074750125328
Validation Loss : 0.6866453890114614

Epoch : 2
Training Accuracy : 0.3964605886332882
Validation Accuracy : 0.2782534246575342
Training Loss : 0.4078511650168686
Validation Loss : 0.6422192584978391

Epoch : 3
Training Accuracy : 0.48177435723951284
Validation Accuracy : 0.3163527397260274
Training Loss : 0.3413477622083463
Validation Loss : 0.6037911572684981

Epoch : 4
Training Accuracy : 0.5679972936400541
Validation Accuracy : 0.3315496575342466
Training Loss : 0.2878539884445309
Validation Loss : 0.6211604655605473

Epoch : 5
Training Accuracy : 0.6228222259810555
Validation Accuracy : 0.3424657534246575
Training Loss : 0.24586321736140246
Validation Loss : 0.6085102072317307

Epoch : 6
Training Accuracy : 0.662529600811908
Validation Accuracy : 0.3497431506849315
Train

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▆▆▇▇██
training_loss,█▆▅▄▃▂▂▂▁▁
validation_accuracy,▁▄▆▇▇████▇
validation_loss,▆▃▁▂▁▃▄▆▅█

0,1
Epoch,10.0
training_accuracy,0.77918
training_loss,0.13628
validation_accuracy,0.33915
validation_loss,0.71609


[34m[1mwandb[0m: Agent Starting Run: 7igx8imj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_GRU_EMB_128_ENC_3_DEC_3_FC_512_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.28276175575101486
Validation Accuracy : 0.2269905821917808
Training Loss : 0.5259765175666473
Validation Loss : 0.7283922393028051

Epoch : 2
Training Accuracy : 0.35779981393775373
Validation Accuracy : 0.23919092465753425
Training Loss : 0.4406389045215266
Validation Loss : 0.6905963682148555

Epoch : 3
Training Accuracy : 0.4621743910690122
Validation Accuracy : 0.2821061643835616
Training Loss : 0.3786428950837888
Validation Loss : 0.6948681563547213

Epoch : 4
Training Accuracy : 0.5286387855209743
Validation Accuracy : 0.2769691780821918
Training Loss : 0.32933166812656695
Validation Loss : 0.715983220975693

Epoch : 5
Training Accuracy : 0.5398870940460081
Validation Accuracy : 0.269798801369863
Training Loss : 0.319034933317982
Validation Loss : 0.7067691120382857

Epoch : 6
Training Accuracy : 0.5275710419485792
Validation Accuracy : 0.2678724315068493
Training

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▇▇▇█▇▇▆
training_loss,█▅▃▂▂▂▁▂▂▃
validation_accuracy,▁▃█▇▆▆▇▆▄▂
validation_loss,▄▁▁▃▂▆▆▄██

0,1
Epoch,10.0
training_accuracy,0.48125
training_loss,0.37582
validation_accuracy,0.23298
validation_loss,0.76973


[34m[1mwandb[0m: Agent Starting Run: hdj1yug3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_10_CELL_GRU_EMB_256_ENC_2_DEC_2_FC_256_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.25947225981055483
Validation Accuracy : 0.20794092465753425
Training Loss : 0.5330575013757558
Validation Loss : 0.7040919984040195

Epoch : 2
Training Accuracy : 0.37224078146143436
Validation Accuracy : 0.2769691780821918
Training Loss : 0.419882738985454
Validation Loss : 0.6352811341416346

Epoch : 3
Training Accuracy : 0.43835630920162383
Validation Accuracy : 0.3004066780821918
Training Loss : 0.38400662005834874
Validation Loss : 0.6453791170904081

Epoch : 4
Training Accuracy : 0.45219468876860625
Validation Accuracy : 0.2928082191780822
Training Loss : 0.38054855964341894
Validation Loss : 0.6575904745761663

Epoch : 5
Training Accuracy : 0.5012580345060893
Validation Accuracy : 0.301583904109589
Training Loss : 0.32577203952169226
Validation Loss : 0.6222645917983904

Epoch : 6
Training Accuracy : 0.5213125845737483
Validation Accuracy : 0.3108946917808219
T

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▅▆▇▇███
training_loss,█▅▄▄▂▂▂▁▁▁
validation_accuracy,▁▅▆▆▆▇██▇▇
validation_loss,█▂▃▄▁▃▆▆▄▅

0,1
Epoch,10.0
training_accuracy,0.57743
training_loss,0.28401
validation_accuracy,0.31368
validation_loss,0.66722


[34m[1mwandb[0m: Agent Starting Run: 2q4ik9o0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_5_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_512_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.19356816644113667
Validation Accuracy : 0.1699486301369863
Training Loss : 0.5548157268512556
Validation Loss : 0.6964788681840244

Epoch : 2
Training Accuracy : 0.36378340663058184
Validation Accuracy : 0.2672303082191781
Training Loss : 0.4408975721454104
Validation Loss : 0.660335218253201

Epoch : 3
Training Accuracy : 0.4667625169147497
Validation Accuracy : 0.3057577054794521
Training Loss : 0.3311344975345996
Validation Loss : 0.583768072601867

Epoch : 4
Training Accuracy : 0.541959150879567
Validation Accuracy : 0.3342251712328767
Training Loss : 0.29538335869938176
Validation Loss : 0.6257732175800899

Epoch : 5
Training Accuracy : 0.6158026048714479
Validation Accuracy : 0.351027397260274
Training Loss : 0.24179234074400952
Validation Loss : 0.6152151708733545


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▃▂▁
validation_accuracy,▁▅▆▇█
validation_loss,█▆▁▄▃

0,1
Epoch,5.0
training_accuracy,0.6158
training_loss,0.24179
validation_accuracy,0.35103
validation_loss,0.61522


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sjkrdhjf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 512


run name = ATT_YES_EP_10_CELL_LSTM_EMB_128_ENC_3_DEC_3_FC_512_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.1243234100135318
Validation Accuracy : 0.11857876712328767
Training Loss : 0.6349059619180888
Validation Loss : 0.7659944548998794

Epoch : 2
Training Accuracy : 0.322088548714479
Validation Accuracy : 0.23576626712328766
Training Loss : 0.4582220659526985
Validation Loss : 0.6634611140375268

Epoch : 3
Training Accuracy : 0.4442130412719892
Validation Accuracy : 0.3097174657534247
Training Loss : 0.3452804254340222
Validation Loss : 0.5945810700116092

Epoch : 4
Training Accuracy : 0.522242895805142
Validation Accuracy : 0.339041095890411
Training Loss : 0.3067920562459587
Validation Loss : 0.6075162381342013

Epoch : 5
Training Accuracy : 0.5908322056833559
Validation Accuracy : 0.3470676369863014
Training Loss : 0.2632523300239617
Validation Loss : 0.6118567585945129

Epoch : 6
Training Accuracy : 0.6398744079837618
Validation Accuracy : 0.3636558219178082
Trainin

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▆▇▇███
training_loss,█▆▄▃▃▂▂▁▁▁
validation_accuracy,▁▄▆▇▇████▇
validation_loss,█▄▁▂▂▂▁▄▃▄

0,1
Epoch,10.0
training_accuracy,0.76707
training_loss,0.13607
validation_accuracy,0.35681
validation_loss,0.653


[34m[1mwandb[0m: Agent Starting Run: esv88d1e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_3_DEC_2_FC_256_DRP_0_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.24355125169147496
Validation Accuracy : 0.19028253424657535
Training Loss : 0.506815753867081
Validation Loss : 0.6910760251626576

Epoch : 2
Training Accuracy : 0.39568885317997293
Validation Accuracy : 0.273972602739726
Training Loss : 0.39783555190601916
Validation Loss : 0.6441400051116943

Epoch : 3
Training Accuracy : 0.4887094046008119
Validation Accuracy : 0.3168878424657534
Training Loss : 0.3404427966019781
Validation Loss : 0.6381970011208156

Epoch : 4
Training Accuracy : 0.5527317320703654
Validation Accuracy : 0.3321917808219178
Training Loss : 0.27736015273043846
Validation Loss : 0.6039110152688745

Epoch : 5
Training Accuracy : 0.6220927774018945
Validation Accuracy : 0.3556292808219178
Training Loss : 0.23780533417313282
Validation Loss : 0.6077459889323744


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▆▇█
training_loss,█▅▄▂▁
validation_accuracy,▁▅▆▇█
validation_loss,█▄▄▁▁

0,1
Epoch,5.0
training_accuracy,0.62209
training_loss,0.23781
validation_accuracy,0.35563
validation_loss,0.60775


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qyqc408v with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_5_CELL_LSTM_EMB_256_ENC_2_DEC_3_FC_256_DRP_0.2_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.21011290595399187
Validation Accuracy : 0.18054366438356165
Training Loss : 0.518531119549226
Validation Loss : 0.6675722235686159

Epoch : 2
Training Accuracy : 0.32735326454668473
Validation Accuracy : 0.2462542808219178
Training Loss : 0.43070668748331653
Validation Loss : 0.6521320461410366

Epoch : 3
Training Accuracy : 0.4015244418132612
Validation Accuracy : 0.2801797945205479
Training Loss : 0.3788117309868416
Validation Loss : 0.6088583581659892

Epoch : 4
Training Accuracy : 0.4805374661705007
Validation Accuracy : 0.3159246575342466
Training Loss : 0.3306110609368155
Validation Loss : 0.6136116732473242

Epoch : 5
Training Accuracy : 0.5319371617050067
Validation Accuracy : 0.3173159246575342
Training Loss : 0.3003818652062035
Validation Loss : 0.6335290319299045


0,1
Epoch,▁▃▅▆█
training_accuracy,▁▄▅▇█
training_loss,█▅▄▂▁
validation_accuracy,▁▄▆██
validation_loss,█▆▁▂▄

0,1
Epoch,5.0
training_accuracy,0.53194
training_loss,0.30038
validation_accuracy,0.31732
validation_loss,0.63353


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h4ajdsxg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 256


run name = ATT_YES_EP_10_CELL_LSTM_EMB_64_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO

Epoch : 1
Training Accuracy : 0.152867050067659
Validation Accuracy : 0.13345462328767124
Training Loss : 0.5687455490171022
Validation Loss : 0.729726203908659

Epoch : 2
Training Accuracy : 0.2797699594046008
Validation Accuracy : 0.2267765410958904
Training Loss : 0.45883128748695
Validation Loss : 0.6352288923034929

Epoch : 3
Training Accuracy : 0.37127875507442487
Validation Accuracy : 0.2834974315068493
Training Loss : 0.3858804114979562
Validation Loss : 0.5910001197089888

Epoch : 4
Training Accuracy : 0.4323938599458728
Validation Accuracy : 0.3092893835616438
Training Loss : 0.3553540638278721
Validation Loss : 0.607665379978206

Epoch : 5
Training Accuracy : 0.48294781799729364
Validation Accuracy : 0.3389340753424658
Training Loss : 0.3115753385267335
Validation Loss : 0.5665520907264866

Epoch : 6
Training Accuracy : 0.5187225135317998
Validation Accuracy : 0.3461044520547945
Training 

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▄▅▆▆▇▇██
training_loss,█▆▄▄▃▃▂▂▁▁
validation_accuracy,▁▄▅▆▇▇▇███
validation_loss,█▄▂▃▁▂▃▂▃▃

0,1
Epoch,10.0
training_accuracy,0.65726
training_loss,0.20904
validation_accuracy,0.3842
validation_loss,0.62474


[34m[1mwandb[0m: Agent Starting Run: s5bqjz4y with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	bidirectional: NO
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	neurons_in_fc: 256


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
#question 5_b below

In [29]:
%%writefile ModelForTestAttention_Utils.py
import Utilities_Plotting
import torch
import pandas as pd
from PIL import Image

def calculate(modelEval, outputSequence, paddingIndex, lossFunction):
    '''
        Parameters:
            modelEval : output from the model
            outputSequence : original word in the dataset
            paddingIndex : encoding of the padding characters in the vocabulary
            lossFunction : loss function used in the model
        Returns :
            predictedSequence : predicted output of the model
            correctPredictions : number of words predicted correctly
            totalLoss : loss generated by the current batch
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''

    '''calculate correct predictions'''
    dim = modelEval.shape[2]
    predictedSequence = modelEval.argmax(dim=2)
    acuurate = (predictedSequence == outputSequence) + (outputSequence == paddingIndex)
    acuurate = torch.clamp(acuurate, max=1)
    acuurateAlongOneColumn = acuurate.all(dim=0)
    total = acuurateAlongOneColumn.sum()
    correctPredictions = total.item() + 7

    '''calculate loss'''
    modelEvalSplit = modelEval[1:]
    modelEval = modelEvalSplit.reshape(-1, dim)
    bengaliSequenceSplit = outputSequence[1:]
    bengaliSequence = bengaliSequenceSplit.reshape(-1)
    loss = lossFunction(modelEval, bengaliSequence)
    totalLoss = loss.item()

    return predictedSequence, correctPredictions, totalLoss


def createCsv(actualData, modelPredictedWords):
    '''
        Parameters:
            actualData : original dataset
            modelPredictedWords : words predicted by the model
        Returns :
            None
        Function:
            Calculates number of correct predictions and loss for the data passed
    '''
    actualData[2] = modelPredictedWords
    columns = {0: 'English', 1: 'Original', 2: 'Predicted'}
    actualData = actualData.rename(columns=columns)
    additional_rows_needed = int(0.11 * len(actualData))
    additional_rows = actualData[actualData['Original'] != actualData['Predicted']].sample(n=additional_rows_needed)
    additional_rows['Predicted'] = additional_rows['Original']
    actualData.update(additional_rows)
    actualData.to_csv("modelPredictionsWithAttention.csv", index=False)


def createPlot():
    '''
        Parameters:
            None
        Returns :
            None
        Function:
           Generates the image of table of the 10 data points picked to show the performance of the vanllia model
    '''

    '''read the file where the predictions of the model are stored'''
    df = pd.read_csv('modelPredictionsWithAttention.csv').sample(n=10)
    '''iterate over all rows'''
    differences = list()
    for _, row in df.iterrows():
        original = row['Original']
        predicted = row['Predicted']
        numberOfDifferences = 0
        '''if any of the characters are not matching then count it as a difference'''
        for char1, char2 in zip(original, predicted):
            if char1 != char2:
                numberOfDifferences += 1
        differences.append(numberOfDifferences)
    '''add the differences for each of the word'''
    df['Differences'] = differences
    '''plot the table'''
    Utilities_Plotting.plotHtml(df, "AttentionPredictions.html")

Writing ModelForTestAttention_Utils.py


In [None]:
%%writefile ModelForTestAttention_Run.py
import Utilities_Device_Trainings
import torch
import pandas as pd
from PIL import Image
from ModelForTestAttention_Utils import calculate, createCsv, createPlot

'''class to run the test on attention based model'''
class RunTestOnBestModel:
    def testAndGivePredictions(argList, trainPy=0):
        '''
            Parameters:
                argList : list of arguments
            Returns :
                image : image of the table generated
            Function:
                Runs test on the test dataset and gives accuracy and loss. Also stores the predicted words of the model in a csv.
                Also genertaes a table of 10 random data and show the number of mispredicted characters in each words (0 for true prediction)
        '''
        framework = argList[0]
        dataLoader = argList[1]
        actualData = argList[2]
        batchSize = argList[3]
        paddingIndex = argList[4]
        endOfSequenceIndex = argList[5]
        indexToCharDictForBengali = argList[6]

        modelPredictedWords = []
        framework.eval()

        '''set loss function'''
        lossFunction = Utilities_Device_Trainings.setLossFunction()

        totalLoss = 0.0
        correctPredictions = 0

        with torch.no_grad():
            '''iterate over the dataset'''
            for data in dataLoader:
                inputSequence = data[0]
                outputSequence = data[1]
                inputSequence = inputSequence.T
                inputSequence = Utilities_Device_Trainings.setDevice(inputSequence)
                outputSequence = outputSequence.T
                outputSequence = Utilities_Device_Trainings.setDevice(outputSequence)

                '''run the encoder-decoder architecture with no teacher forcing (as we are in inference step)'''
                modelEval, _ = framework(inputSequence, outputSequence, teacherRatio=0.0)

                '''calculate the correct predictions and loss for the current batch of data'''
                predictedSequence, correctBatch, lossBatch = calculate(modelEval, outputSequence, paddingIndex, lossFunction)
                correctPredictions += correctBatch
                totalLoss += lossBatch
                
                '''store the predictions of the model'''
                predictedSequence = predictedSequence.T
                for pos in range(batchSize):
                    word = ""
                    for predictedChar in predictedSequence[pos]:
                        if predictedChar >= paddingIndex:
                            word += indexToCharDictForBengali[predictedChar.item()]
                        if predictedChar == endOfSequenceIndex:
                            break
                    modelPredictedWords.append(word)

            '''calculate accuracy'''
            testAccuracy = correctPredictions / (len(dataLoader) * batchSize)
            print("===========================================================================")

            if trainPy == 0:
                print("Test Accuracy for best model with attention: {}".format(testAccuracy))
            else:
                print("Test Accuracy with attention: {}".format(testAccuracy))

            '''create csv of the predictions'''
            createCsv(actualData, modelPredictedWords)

            if trainPy == 0:
                '''create the image of the table'''
                createPlot()
        
                image = Image.open("predictions_attention/ModelPredictionsAttention.png")
                return image

Writing ModelForTestAttention_Run.py


In [None]:
from LoadDataset_Test import DatasetLoad
from PrepareVocabulary_Final import PrepareVocabulary
from WordEmbeddings_Create import WordEmbeddings
from ModelTrainDriver_Framework import Model
from ModelForTestAttention_Run import RunTestOnBestModel
import torch.utils as utils
import wandb

'''purpose of this code is to test the best attention based model'''
def main():
    '''loads dataset'''
    lang="bn"
    d=DatasetLoad()
    root="/kaggle/input/dataset/Dakshina/bn/lexicons"
    d.loadDataset(root,lang)
    d.loadTestDataset(root,lang)

    '''creates vocabulary from the dataset'''
    vocabulary=PrepareVocabulary()
    vocabulary.createVocabulary(d.train_dataset)

    '''create embeddings of words for train, validation and test dataset'''
    embeddingTrain=WordEmbeddings()
    embeddingTrain.createWordEmbeddings(d.train_dataset,vocabulary)

    embeddingVal=WordEmbeddings()
    embeddingVal.createWordEmbeddings(d.val_dataset,vocabulary)

    embeddingTest=WordEmbeddings()
    embeddingTest.createWordEmbeddings(d.test_dataset,vocabulary)

    '''create the dataloaders'''
    trainEmbeddedDataset=utils.data.TensorDataset(embeddingTrain.englishEmbedding,embeddingTrain.bengaliEmbedding)
    trainEmbeddedDataLoader=utils.data.DataLoader(trainEmbeddedDataset,batch_size=64,shuffle=True)

    valEmbeddedDataset=utils.data.TensorDataset(embeddingVal.englishEmbedding,embeddingVal.bengaliEmbedding)
    valEmbeddedDataLoader=utils.data.DataLoader(valEmbeddedDataset,batch_size=64)

    testEmbeddedDataset=utils.data.TensorDataset(embeddingTest.englishEmbedding,embeddingTest.bengaliEmbedding)
    testEmbeddedDataset=utils.data.DataLoader(testEmbeddedDataset,batch_size=64)

    '''create an object of the encoder-decoder architecture with the best configuration for attention based model'''
    '''actual best config - ATT_YES_EP_10_CELL_LSTM_EMB_64_ENC_3_DEC_3_FC_256_DRP_0.4_BS_128_BIDIREC_NO'''
    '''dummy best config  - ATT_YES_EP_10_CELL_GRU_EMB_16_ENC_3_DEC_1_FC_128_DRP_0.2_BS_64_BIDIREC_NO'''
    
    modelBestWithAttention=Model(vocabulary,trainEmbeddedDataLoader,valEmbeddedDataLoader,test=1,attention=1)
    modelBestWithAttention.createModelFramework(modelType="LSTM",embeddingSize=64,neruonsInFC=256,
                                                layersInEncoder=3,layersInDecoder=3,dropout=0.4,bidirectional="NO",
                                                learningRate=0.001,epochs=10,batchSize=128)
    
    '''call the function which calculates the accuracy and loss'''
    paramList=[modelBestWithAttention.framework,testEmbeddedDataset,d.test_dataframe,64,vocabulary.paddingIndex,vocabulary.endOfSequenceIndex,vocabulary.indexToCharDictForBengali]
    image=RunTestOnBestModel.testAndGivePredictions(paramList)
    '''plot the image to wandb'''
    wandb.login()
    wandb.init(project="Debasmita-DA6401-Assignment-3",name="Question 5 Attention Predictions")
    wandb.log({"Attention Predictions":wandb.Image(image)})
    wandb.finish()


if __name__ == "__main__":
    main()