In [75]:
import numpy as np
import pandas as pd
import os
import cv2
import pathlib
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import random
from torch.autograd import Variable
import time
import numpy as np
import csv
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

In [60]:
class DataProcessing():

    # Constructor for DataProcessing
    def __init__(self, DATAPATH, source_lang, target_lang,device,config):
        
        # Source language
        self.source_lang = source_lang

        # Target language
        self.target_lang = target_lang
        
        # Start of Word and its Integer representation
        self.SOW = '>'
        self.SOW_char2int = 0

        # End of Word and its Integer representation
        self.EOW = '<'
        self.EOW_char2int = 1

        # Padding and its Integer representation
        self.PAD = '.'
        self.PAD_char2int = 2

        # Unknown and its Integer representation
        self.UNK = '?'
        self.UNK_char2int = 3 

        self.device = device
        self.batch_size = config["batch_size"]
        
        # Get path of train,validation and test data
        self.trainPath = os.path.join(DATAPATH,self.target_lang,self.target_lang + "_train.csv")
        self.validationPath = os.path.join(DATAPATH,self.target_lang,self.target_lang + "_valid.csv")
        self.testPath = os.path.join(DATAPATH,self.target_lang,self.target_lang + "_test.csv")
        
        # Load train data and set the column names - [source,target]
        self.train = pd.read_csv(
            self.trainPath,
            sep=",",
            names=["source", "target"],
        )

        # Load validation data and set the column names - [source,target]
        self.val = pd.read_csv(
            self.validationPath,
            sep=",",
            names=["source", "target"],
        )

        # Load test data and set the column names - [source,target]
        self.test = pd.read_csv(
            self.testPath,
            sep=",",
            names=["source", "target"],
        )
        
        # Creates train data
        self.train_data = self.preprocess(self.train["source"].to_list(), self.train["target"].to_list())

        # Store encoder input,decoder input,decoder target,source vocabulary,target vocabulary
        self.trainEncodeInput,self.trainDecoderInput,self.trainDecoderTarget,self.source_vocab,self.target_vocab = self.train_data
        
        self.source_char2int,self.source_int2char = self.source_vocab
        self.target_char2int,self.target_int2char = self.target_vocab
        self.max_length = self.getMaxLength()
       
    def encode(self, source_words, target_words, source_chars, target_chars, source_char2int = None, target_char2int = None):
        '''
        Input - 1.source_words - list of all source words
                2.target_words - list of all target words
                3.source_chars - sorted list of all characters in source language
                4.target_chars - sourted list of all characters in target langauge
                5.source_char2int - Dictionary mappig of charcater to integer for source words
                6.target_char2int - Dictionary mappig of charcater to integer for target words
        ''' 

        # Generate source and target vocab pairs containing dictionary mapping of character to integer and integer to character for source and target words
        source_vocab, target_vocab = None, None
        if source_char2int == None and target_char2int == None:

            source_char2int = dict([(char, i + 4) for i, char in enumerate(source_chars)])
            target_char2int = dict([(char, i + 4) for i, char in enumerate(target_chars)])

            source_int2char = dict([(i + 4, char) for i, char in enumerate(source_chars)])
            target_int2char = dict([(i + 4, char) for i, char in enumerate(target_chars)])

            # Add SOW to dictionaries
            source_char2int[self.SOW] = self.SOW_char2int
            source_int2char[self.SOW_char2int] = self.SOW
            target_char2int[self.SOW] = self.SOW_char2int
            target_int2char[self.SOW_char2int] = self.SOW

            # Add EOW to dictionaries
            source_char2int[self.EOW] = self.EOW_char2int
            source_int2char[self.EOW_char2int] = self.EOW
            target_char2int[self.EOW] = self.EOW_char2int
            target_int2char[self.EOW_char2int] = self.EOW

            # Add PAD to dictionaries
            source_char2int[self.PAD] = self.PAD_char2int
            source_int2char[self.PAD_char2int] = self.PAD
            target_char2int[self.PAD] = self.PAD_char2int
            target_int2char[self.PAD_char2int] = self.PAD

            # Add UNK to dictionaries
            source_char2int[self.UNK] = self.UNK_char2int
            source_int2char[self.UNK_char2int] = self.UNK
            target_char2int[self.UNK] = self.UNK_char2int
            target_int2char[self.UNK_char2int] = self.UNK

            source_vocab = (source_char2int,source_int2char)
            target_vocab = (target_char2int,target_int2char)
        
        
        self.encoder_input_data = np.zeros((len(source_words), self.max_source_length,self.num_encoder_tokens), dtype="float32")
        self.decoder_input_data = np.zeros((len(source_words), self.max_target_length,self.num_decoder_tokens), dtype="float32")
        self.decoder_target_data = np.zeros((len(source_words), self.max_target_length,self.num_decoder_tokens), dtype="float32")
            
        
        if source_vocab != None and target_vocab != None:
            return (
                    self.encoder_input_data,
                    self.decoder_input_data,
                    self.decoder_target_data,
                    source_vocab,
                    target_vocab,
                )
        
        # Source and TargetVocab were not created in the function. 
        # This implies sourceCharToInt and targetCharToInt were not None. Hence the vocab info is already present and we don't return the two tuples.
        else:
            return self.encoder_input_data, self.decoder_input_data, self.decoder_target_data

    def preprocess(self, source , target):
       
        # Creating list of words used in source language and converting them into string
        self.source_words = []
        for src in source:
            self.source_words.append(str(src))
        
        # Creating list of words used in target language and converting them into string
        self.target_words = []
        for trg in target:
            self.target_words.append(str(trg))
        
        # Set used to store characters used in source language
        source_chars = set()
        
        # Set used to store characters used in target language
        target_chars = set()

        # Populate source_chars and target_chars
        for src, tgt in zip(self.source_words, self.target_words):
            for char in src:
                source_chars.add(char)

            for char in tgt:
                target_chars.add(char)

        
        # Total number of training samples
        self.number_of_train_samples = len(self.source_words)
        
        
        # Sort the characters used in source and target language       
        source_chars = sorted(list(source_chars))
        target_chars = sorted(list(target_chars))

        # Number of unique characters in source language + Start_of_word,end_of_word,padding and unknown token
        self.num_encoder_tokens = len(source_chars) + 4
        
        # Number of unique characters in target language + Start_of_word,end_of_word,padding and unknown token
        self.num_decoder_tokens = len(target_chars) + 4

        # Length of maximum word in source_words
        self.max_source_length = max([len(txt) for txt in self.source_words])

        # Length of maximum word in target_words
        self.max_target_length = max([len(txt) for txt in self.target_words])

        return self.encode(self.source_words, self.target_words, source_chars, target_chars)

    # Returns list of integer mapped to character
    def indexesFromWord(self,lang,word):
        indexes = []
        if lang == "source":
            for char in word:
                # If character is in dictionary,add it to the list ,else add unknown token
                if char in self.source_char2int:
                    indexes.append(self.source_char2int[char])
                else:
                    indexes.append(self.UNK_char2int)
        if lang == "target":
            for char in word:
                # If character is in dictionary,add it to the list ,else add unknown token
                if char in self.target_char2int:
                    indexes.append(self.target_char2int[char])
                else:
                    indexes.append(self.UNK_char2int)

        return indexes

    # Create tensor for word
    def tensorFromWord(self,lang, word):
        # Gets list of integer mapped to character
        indexes = self.indexesFromWord(lang, word)
        
        # Append EOW2Int 
        indexes.append(self.EOW_char2int)
        max_length = self.max_length
        
        # Add padding
        len_padding = max_length - len(indexes) + 1
        
        indexes.extend([self.PAD_char2int for i in range(len_padding)])
        
        return torch.tensor(indexes, dtype = torch.long, device = self.device).view(-1, 1)

    # Create tensor from pair 
    def tensorsFromPair(self,pairs):
        # Get the source and target word for a given pair and generate tensors for them
        source_tensor = self.tensorFromWord("source", pairs[0])
        target_tensor = self.tensorFromWord("target", pairs[1])

        return (source_tensor, target_tensor)
    
    # Create list of pairs conataining source_words and target_words for train data
    def createTrainPairs(self):
        pairs  = []
        source_words = self.source_words
        target_words = self.target_words

        for source_word,target_word in zip(source_words,target_words):
            pairs.append((source_word,target_word))
        return pairs
    
    # Create list of pairs conataining source_words and target_words for validation data
    def createValidationData(self):
        pairs = []
        source_words = []
        target_words = []
        for word in self.val["source"].to_list():
            source_words.append(word)

        for word in self.val["target"].to_list():
            target_words.append(word)
        
        for source_word,target_word in zip(source_words,target_words):
            pairs.append((source_word,target_word))

        return pairs
    
    # Create list of pairs conataining source_words and target_words for test data
    def createTestData(self):
        pairs = []
        source_words = []
        target_words = []
        for word in self.test["source"].to_list():
            source_words.append(word)

        for word in self.test["target"].to_list():
            target_words.append(word)
        
        for source_word,target_word in zip(source_words,target_words):
            pairs.append((source_word,target_word))

        return pairs
    
    # Returns the maximum length of word from source_lang and target_lang
    def getMaxLength(self):
        
        source_words = []
        target_words = []
        for word in self.val["source"].to_list():
            source_words.append(word)

        for word in self.val["target"].to_list():
            target_words.append(word)
        
        val_max_source_length = max([len(txt) for txt in source_words])
        val_max_target_length = max([len(txt) for txt in target_words])
        
        source_words = []
        target_words = []
        for word in self.test["source"].to_list():
            source_words.append(word)

        for word in self.test["target"].to_list():
            target_words.append(word)
        
        test_max_source_length = max([len(txt) for txt in source_words])
        test_max_target_length = max([len(txt) for txt in target_words])

        self.maxSourceLength = max([self.max_source_length,val_max_source_length,test_max_source_length])
        self.maxTargetLength = max([self.max_target_length,val_max_target_length,test_max_target_length])

        return max(self.maxSourceLength,self.maxTargetLength)
    
    # Returns loader for validation data
    def getValLoader(self):
        
        validationPairs = self.createValidationData()
        validation_pairs = []
        for pair in validationPairs:
            validation_pairs.append(self.tensorsFromPair(pair))
        
        val_Loader = DataLoader(validation_pairs,batch_size = self.batch_size,shuffle = True)
        return val_Loader
    
    # Returs loader for test data
    def getTestLoader(self):
        testPairs = self.createTestData()

        test_pairs = []
        for pair in testPairs:
            test_pairs.append(self.tensorsFromPair(pair))
        
        test_Loader = DataLoader(test_pairs,batch_size = self.batch_size,shuffle = True)

        return test_Loader
    
    

In [61]:
class Encoder(nn.Module):
    
    # Encoder Destructor
    def __init__(self, input_size,config):
        super(Encoder, self).__init__()

        # Store parameters in class varaibles
        self.hidden_size = config["hidden_size"]
        self.embedding_size = config["embedding_size"]
        self.cell_type = config["cell_type"] 
        self.numLayers = config["numLayers"]
        self.drop_out = config["drop_out"]
        self.bidirectional = config["bidirectional"]
        self.batch_size = config["batch_size"]
        
        # input_size - contains the number of encoder tokens which is input to Embedding
        # hidden_size - size of each embedding vector
        # Create an Embedding for the Input 
        # Each character will have an embedding of size = embedding_size
        self.embedding = nn.Embedding(input_size, self.embedding_size)
        self.dropout = nn.Dropout(self.drop_out)
        
        # the cell_type - GRU
        if self.cell_type == "gru":
            ''' Input to GRU -  1.number of expected features in x - embedded input 
                                2.number of features in hidden state - hidden_size
                                3.number of layers (stacking GRUs together) '''
            self.gru = nn.GRU(self.embedding_size, self.hidden_size,num_layers = self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.gru
        
        # the cell_type - RNN
        if self.cell_type == "rnn":
            ''' Input to RNN -  1.number of expected features in x
                                2.number of features in hidden state
                                3.number of layers (stacking RNNs together) '''
            
            self.rnn = nn.RNN(self.embedding_size,self.hidden_size,num_layers = self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.rnn
        
        # the cell_type - LSTM
        if self.cell_type == "lstm":
            ''' Input to LSTM - 1.number of expected features in x
                                2.number of features in hidden state
                                3.number of layers (stacking LSTMs together) '''
            
            self.lstm = nn.LSTM(self.embedding_size,self.hidden_size,num_layers = self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.lstm

    # Encoder forward pass
    def forward(self, input, hidden,cell_state = None):
        '''Input -> hidden      - initial hidden state for each element in the input sequence
                    cell_hidden - the initial cell state for each element in the input sequence
        '''
       
        # Creates a embedded tensor by passing the input to the embedding layer and resizing the output to (1,batch_size,-1)
        embedded = self.dropout(self.embedding(input).view(1, self.batch_size, -1))
        
        # Pass this embedded input to the GRU/LSTM/RNN model
        output = embedded
          
        '''Output     -     1.Output features from the last layer
                            2.final hidden state for each element which is passed to decoder as a context vector'''
        output, hidden = self.rnnLayer(output, hidden)
        return output, hidden
        
        
    # Initailizes initial hidden layer for encoder
    def initHidden(self,device,numLayers):
        if self.bidirectional:
            return torch.zeros(numLayers * 2, self.batch_size, self.hidden_size, device=device)
        else:
            return torch.zeros(numLayers, self.batch_size, self.hidden_size, device=device)

class Decoder(nn.Module):

    # Decoder Constructor
    def __init__(self,output_size,config,data): 
        super(Decoder, self).__init__()

        
        # Store parameters in class varaibles
        self.numLayers = config["numLayers"]
        self.cell_type = config["cell_type"]
        self.hidden_size = config["hidden_size"]
        self.embedding_size = config["embedding_size"]
        # Create embedding for input
        self.embedding = nn.Embedding(output_size, self.embedding_size)
        self.drop_out = config["drop_out"]       
        self.bidirectional = config["bidirectional"]
        self.batch_size = config["batch_size"]
        self.dropout = nn.Dropout(self.drop_out)
        
        if self.cell_type == "gru":
            self.gru = nn.GRU(self.embedding_size, self.hidden_size,num_layers = self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.gru

        if self.cell_type == "rnn":
            self.rnn = nn.RNN(self.embedding_size,self.hidden_size,self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.rnn
        
        if self.cell_type == "lstm":
            self.lstm = nn.LSTM(self.embedding_size,self.hidden_size,self.numLayers,dropout = self.drop_out,bidirectional = self.bidirectional)
            self.rnnLayer = self.lstm
       
        # Creating a dense layer
        if self.bidirectional:
            self.out = nn.Linear(self.hidden_size * 2 ,output_size)
        else:
            self.out = nn.Linear(self.hidden_size, output_size)
        
        # Softmax function as an output function
        self.softmax = nn.LogSoftmax(dim = 1)

    def forward(self, input, hidden,cell_state = None):

        # Create an embedding for input and resize it to (1,batch_size,-1)
        # The output of embedding layer is passed as an input to decoder
        output = self.dropout(self.embedding(input).view(1, self.batch_size, -1))

        # Applying ReLU activation function
        output = F.relu(output)

        # Pass output and previous hidden state to model RNN/LSTM/GRU
        output, hidden = self.rnnLayer(output, hidden)
        
        # apply softmax function as an output function
        output = self.softmax(self.out(output[0]))
        
        return output, hidden
 

In [62]:
teacher_forcing_ratio = 0.5

In [89]:
'''
    Input ->    1.source_tensor - a tensor for given source word containing character indexes
                2.target_tensor - a tensor for given target word containing character indexes
                3.encoder - object of Encdoer class
                4.decoder - object of Decoder class
                5.encoder_optimizer - optimizer used for encoder
                6.decoder_optimizer - optimizer used for decoder
                7.criterion - loss function
                8.max_length - maximum length of source word
                9.device - CUDA or CPU
'''
def trainforOneEpoch(config,data,source_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    
    device = config["device"]
    max_length = data.max_length
    batch_size = config["batch_size"]
    attention = config["attention"]
    hidden_size = config["hidden_size"]
    
    
    # Initailize initial hidden layer for encoder
    encoder_hidden = encoder.initHidden(device,config["numLayers"])
    
    if config["cell_type"] == "lstm":
        encoder_cell_state = encoder.initHidden(device,config["numLayers"])
        encoder_hidden = (encoder_hidden,encoder_cell_state)
    
    # Empty the gradients for encoder and decoder
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    source_tensor = source_tensor.squeeze()
    target_tensor = target_tensor.squeeze()
    
    # Length of source and target tensor
    source_length = source_tensor.size(0)
    target_length = target_tensor.size(0)
    
    
    # Stores all encoder outputs for each character in source word
    if attention:
        encoder_outputs = torch.zeros(max_length + 1,batch_size, hidden_size, device = device)

    # Initialize loss to ZERO
    loss = 0
    
    # encoder encodes each character index in source_word
    for ei in range(source_length):
        encoder_output, encoder_hidden = encoder(source_tensor[ei], encoder_hidden)
        if attention:
            encoder_outputs[ei] = encoder_output[0]
    

    # Initialize decoder input with start of word token
    decoder_input = torch.tensor([[data.SOW_char2int] * batch_size],device = device)

    # initial hidden layer for decoder will be final hidden layer from the encoder
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    total_correct_words = 0  # Initialize total correct words //added

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            if attention:  
                decoder_output, decoder_hidden,decoder_attention = decoder(decoder_input, decoder_hidden,encoder_outputs.reshape(batch_size,max_length + 1,hidden_size))
            else:
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)

            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            if attention:
                decoder_output, decoder_hidden,decoder_attention = decoder(decoder_input, decoder_hidden,encoder_outputs.reshape(batch_size,max_length + 1,hidden_size))
            else:
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            
            # returns top value and index from the decoder output
            topv, topi = decoder_output.topk(1)
            
            # Squeeze all the dimensions that are 1 and returns a new tensor detatched from the current history graph
            decoder_input = topi.squeeze().detach()            

            # Compute loss
            loss += criterion(decoder_output, target_tensor[di])
             # Compute number of correct words
            if torch.equal(decoder_input, target_tensor[di]): #added
                total_correct_words += 1

    # Backpropagation
    loss.backward()

    # Update parameters
    encoder_optimizer.step()
    decoder_optimizer.step()

    # return the loss
    return loss.item() / target_length , total_correct_words #added

'''
    Input - 1.object of Encoder class
            2.object of Decoder class
            3.n_iters - number of epochs
            4.print_every - prints every given milliseconds
            5.plot_every - plots every given milliseconds
            6.learning rate
'''
def trainIters(config,total_batches,loader,data,encoder,decoder,wandbapply):

    if wandbapply:
        wandb.init(
            project=config["wandb_project"]
        )

    epochs = config["epoch"]
    learning_rate = config["learning_rate"]
    criterion = nn.CrossEntropyLoss()
    
    # Set optimizers for encoder and decoder
    encoder_optimizer = optim.NAdam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.NAdam(decoder.parameters(), lr=learning_rate)
    
    
    for epoch in range(epochs):
        epoch_loss = 0
        batch_no = 1
        # Train for each batch
        epoch_total_correct_words = 0  # Initialize total correct words for the epoch added
        #tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")
        for batchx,batchy in tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}"):
               
            batchx = batchx.transpose(0,1)
            batchy = batchy.transpose(0,1)
            batch_loss,batch_correct_words = trainforOneEpoch(config = config,
                                            data = data,
                                            source_tensor = batchx, 
                                            target_tensor = batchy, 
                                            encoder = encoder,
                                            decoder = decoder,
                                            encoder_optimizer = encoder_optimizer, 
                                            decoder_optimizer = decoder_optimizer, 
                                            criterion = criterion)
            
            epoch_loss += batch_loss
            epoch_total_correct_words += batch_correct_words  # Accumulate total correct words added
            batch_no+=1
#             if batch_no % 100 == 0:
#                 print("epoch:" + str(epoch + 1) + " / " + str(epochs) + "    batch:" + str(batch_no) + " / " + str(total_batches))
        
        val_loader = data.getValLoader()
        
        if epoch == (epochs - 1):
            
            # Compute validation accuracy
            validation_loss ,validation_accuracy = evaluate(config=config,
                loader=val_loader,
                data=data,
                encoder=encoder,
                decoder=decoder,
                training_completed=True,
                test = False
            )
        else:
            # Compute validation accuracy
            validation_loss ,validation_accuracy = evaluate(config=config,
                loader=val_loader,
                data=data,
                encoder=encoder,
                decoder=decoder,
                training_completed=False,
                test = False
            )
        train_loss = epoch_loss / total_batches
        train_accuracy = (epoch_total_correct_words / len(loader.dataset)) * 100  # Calculate training accuracy added
       
        '''print("epoch:{epoch}, train loss:{train_l}, validation loss:{validation_l}, validation accuracy:{validation_ac}".\
                  format(epoch = epoch + 1,train_l = train_loss,validation_l = validation_loss,validation_ac = validation_accuracy))'''
        
        print("epoch:{epoch}, train loss:{train_l}, train accuracy:{train_ac}, validation loss:{validation_l}, validation accuracy:{validation_ac}". \
              format(epoch=epoch + 1, train_l=train_loss, train_ac=train_accuracy, validation_l=validation_loss,
                     validation_ac=validation_accuracy))
        if wandbapply:
            wandb.log({'train loss':train_loss,'validation loss':validation_loss, 'validation accuracy':validation_accuracy})
    '''
    Code for using test data
    '''
    print("epochs completed")
    test_loader = data.getTestLoader()
        
    config["batch_size"] = 1
    test_loss ,test_accuracy = evaluate(config=config,
             loader=test_loader,
             data=data,
             encoder=encoder,
             decoder=decoder,
             training_completed=False,
             test = True
        )
    print("Test Accuracy:",test_accuracy)
    # wandb.init(
    #         project=config["wandb_project"]
    #     )
    # wandb.log({'test loss':test_loss,'test accuracy':test_accuracy})


In [86]:
def evaluate(config,data, loader, encoder, decoder,training_completed,test) :

        loss = 0
        totalCorrectWords = 0
        batchNumber = 1
        batch_size = config["batch_size"]
        
        totalWords = len(loader.sampler)
        totalBatches = len(loader.sampler) // batch_size

        # Loss Function
        criterion = nn.CrossEntropyLoss()

        for sourceTensor, targetTensor in loader :
            batchLoss, correctWords,attentions = evaluateOneBatch(config,data,sourceTensor, targetTensor, encoder, decoder, criterion,test)

            loss += batchLoss
            totalCorrectWords += correctWords

        # If training is completed,then dispay heatmaps
        if training_completed == True :
            if attentions is not None:
                volume  = attentions.numpy()

                for point in range(10):
                    heatMap = np.zeros((data.max_length + 1,data.max_length + 1))

                    for i in range(data.max_length + 1):
                        for k in range(data.max_length + 1):
                            heatMap[i][k] = volume[i][point][k]


                    # plotHeatMap(heatMap)

        return (loss / totalBatches), (totalCorrectWords / totalWords) * 100


In [65]:
def evaluateOneBatch(config,data, sourceTensorBatch, targetTensorBatch, encoder, decoder, criterion,test) :

        loss = 0
        correctWords = 0

        batchSize = data.batch_size
        device = config["device"]
        maxLengthWord = data.max_length
        cell_type = config["cell_type"]
        attention = config["attention"]
        hidden_size = config["hidden_size"]

        sourceTensor = Variable(sourceTensorBatch.transpose(0, 1))
        targetTensor = Variable(targetTensorBatch.transpose(0, 1))
        
        # Get source length
        sourceTensorLength = sourceTensor.size()[0]
        targetTensorLength = targetTensor.size()[0]

        predictedBatchOutput = torch.zeros(targetTensorLength, batchSize, device = device)

        # Initialize initial hidden state of encoder
        encoderHidden = encoder.initHidden(device = device,numLayers = config["numLayers"])

        if cell_type == "lstm":
            encoderCell = encoder.initHidden(device = device,numLayers = config["numLayers"])
            encoderHidden = (encoderHidden, encoderCell)

        if attention:
            encoderOutputs = torch.zeros(maxLengthWord + 1, batchSize, hidden_size, device = device)

        for ei in range(sourceTensorLength):
            encoderOutput, encoderHidden = encoder(sourceTensor[ei], encoderHidden)

            if attention :
                encoderOutputs[ei] = encoderOutput[0]

        # Initialize input to decoder with start of word token
        decoderInput = torch.tensor([[data.SOW_char2int] * batchSize], device = device)

        # initial hidden state for decoder will be final hidden state of encoder
        decoderHidden = encoderHidden

        if attention :
            decoderAttentions = torch.zeros(maxLengthWord + 1,batchSize, maxLengthWord + 1)
        
        for di in range(targetTensorLength):
            if attention :
                # Pass the decoderInput, decoderHidden and encoderOutputs to the decoder
                decoderOutput, decoderHidden, decoderAttention = decoder(decoderInput, decoderHidden, encoderOutputs.reshape(batchSize,maxLengthWord + 1,hidden_size))
                decoderAttentions[di] = decoderAttention.data
            else : 
                decoderOutput, decoderHidden = decoder(decoderInput, decoderHidden)
            
            loss += criterion(decoderOutput, targetTensor[di].squeeze())
            
            topv, topi = decoderOutput.data.topk(1)
            decoderInput = torch.cat(tuple(topi))
            predictedBatchOutput[di] = torch.cat(tuple(topi))
#             if torch.equal(topi.squeeze().detach(), targetTensor[di].squeeze()): #added
#                 correctWords += 1
    
        predictedBatchOutput = predictedBatchOutput.transpose(0,1)

        ignore = [data.SOW_char2int, data.EOW_char2int,data.PAD_char2int]
        
        predicted_list = []
        target_list = []
        input_list = []
        
        for di in range(predictedBatchOutput.size()[0]):

            predicted = [letter.item() for letter in predictedBatchOutput[di] if letter not in ignore]
            actual = [letter.item() for letter in targetTensorBatch[di] if letter not in ignore]
            inputText = [letter.item() for letter in sourceTensorBatch[di] if letter not in ignore]

            predictedChars = [data.target_int2char[char] for char in predicted]
            actualChars = [data.target_int2char[char] for char in actual]
            inputChars = [data.source_int2char[char] for char in inputText]

            predictedWord = "".join([str(i) for i in predictedChars])
            actualWord = "".join([str(i) for i in actualChars])
            inputWord = "".join([str(i) for i in inputChars])

            predicted_list.append(predictedWord)
            target_list.append(actualWord)
            input_list.append(inputWord)

            if predicted == actual:
                correctWords += 1
        
        if test:
            writeToCSV(predicted_list,target_list,input_list)
                
        if attention:
            return loss.item() / len(sourceTensorBatch), correctWords,decoderAttentions
        
        return loss.item() / len(sourceTensorBatch), correctWords,None


In [66]:
'''
Run this file if you want to run the code without WANDB
'''

def trainForConfigs(config,add_wandb):
    # Load and pre-process data
    device = config["device"]
    data = DataProcessing(DATAPATH = '/kaggle/input/aksharantar-sampled/aksharantar_sampled', source_lang = config["source_lang"], target_lang = config["target_lang"],device = device,config = config)
    
    config["maxLength"] = data.getMaxLength()
    batch_size = config["batch_size"]
    
    # Create encoder with input size = number of characters in source langauge and specified embedding size
    encoder = Encoder(data.num_encoder_tokens,config).to(device)
    
    # Create encoder with output size = number of characters in target langauge and specified embedding size
    decoder = Decoder(data.num_decoder_tokens,config,data).to(device)
    

    trainLoader,total_batches = getTrainLoader(data,batch_size)    
    
    # Train the model and compute loss and accuracy
    trainIters(config = config,loader=trainLoader,total_batches=total_batches,data = data,encoder = encoder,decoder = decoder,wandbapply = add_wandb)
    
# Returns loader for train data and total number of batches in training data
def getTrainLoader(data,batch_size):
        trainPairs = data.createTrainPairs()
        training_pairs = []
        for pair in trainPairs:
            training_pairs.append(data.tensorsFromPair(pair))
        
        trainLoader = DataLoader(training_pairs,batch_size = batch_size,shuffle = True)
        total_batches = len(training_pairs) // batch_size
   
        return trainLoader,total_batches



In [90]:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    config = {
        "wandb_project": "DL Assignment 3-1",
        "wandb_entity": "cs22m019",
        "hidden_size" : 512,
        "source_lang" : 'en',
        "target_lang" : 'hin',
        "cell_type"   : "gru",
        "numLayers" : 2,
        "drop_out"    : 0.3, 
        "embedding_size" : 256,
        "bidirectional" : False,
        "batch_size" : 32,
        "attention" : False,
        "epoch" : 5,
        "device" : device,
        "learning_rate" : 0.001 #not good for 0.01
    }

    # Update parameters obtained from command line
#     update_parameters(config)
    
    startime = time.time()
    trainForConfigs(config,add_wandb=False)
    endTime = (time.time() - startime)
    print(endTime / 60)

Epoch 1/5: 100%|██████████| 1600/1600 [01:38<00:00, 16.17it/s]


epoch:1, train loss:0.5987209287727332, train accuracy:18.32421875, validation loss:0.3879552190192044, validation accuracy:17.9443359375


Epoch 2/5: 100%|██████████| 1600/1600 [01:38<00:00, 16.19it/s]


epoch:2, train loss:0.3873124825733679, train accuracy:18.2421875, validation loss:0.3116851083468646, validation accuracy:21.240234375


Epoch 3/5: 100%|██████████| 1600/1600 [01:39<00:00, 16.04it/s]


epoch:3, train loss:0.3534393382072445, train accuracy:18.49609375, validation loss:0.30491670151241124, validation accuracy:21.0205078125


Epoch 4/5: 100%|██████████| 1600/1600 [01:39<00:00, 16.12it/s]


epoch:4, train loss:0.33201204475981266, train accuracy:18.203125, validation loss:0.3018288140883669, validation accuracy:25.0


Epoch 5/5: 100%|██████████| 1600/1600 [01:39<00:00, 16.12it/s]


epoch:5, train loss:0.3176198307949081, train accuracy:17.873046875, validation loss:0.31070759252179414, validation accuracy:24.951171875
epochs completed


NameError: name 'writeToCSV' is not defined