In [14]:
# imports
!pip install wandb
import pandas as pd
import math
import torch
import torchvision
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets  # Standard datasets
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch import nn  # All neural network modules
from torch.utils.data import (
    DataLoader, random_split
)  # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm  # For nice progress bar!

from torchvision.datasets import ImageFolder
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pathlib
import pandas as pd
import numpy as np



enable_gpu= torch.cuda.is_available()
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda


In [15]:

token_mapping = {
    'start': 0,
    'end': 1,
    'lang_1': 'eng',
    'lang_2': 'hin',
    'UNK': 3,
    'Padding_token': 4
}



In [16]:
def readData(dir):
    """
    Reads the data from a CSV file located at the specified directory and returns it as a Pandas DataFrame.
    """
    data = pd.read_csv(dir, sep=",", names=['input', 'output'])
    return data

def createPairs(input_list, output_list):
    """
    Takes two lists of inputs and outputs and returns a list of pairs, where each pair is a list containing an input and its corresponding output.
    """
    pairs = [[input_list[i], output_list[i]] for i in range(len(input_list))]
    return pairs

def addWordsToLang(lang, words):
    """
    Takes a Lang object and a list of words and adds each word to the Lang's vocabulary.
    """
    for word in words:
        lang.addAllCharactersFromWord(word)

def prepareData(dir, lang_1, lang_2):
    """
    Reads the data from a CSV file located at the specified directory, creates a list of pairs of inputs and outputs,
    and creates and populates two Lang objects with the vocabulary of the inputs and outputs. Returns the Lang objects,
    the list of pairs, and the maximum length of the inputs and outputs.
    """
    data = readData(dir)
    input_list = data['input'].to_list()
    output_list = data['output'].to_list()
    pairs = createPairs(input_list, output_list)
    input_lang = dictionary(token_mapping['lang_1'])
    output_lang = dictionary(token_mapping['lang_2'])
    addWordsToLang(input_lang, input_list)
    addWordsToLang(output_lang, output_list)
    max_input_length = max([len(txt) for txt in input_list])
    max_output_length = max([len(txt) for txt in output_list])
    return input_lang, output_lang, pairs, max_input_length, max_output_length


In [17]:
class dictionary:
    def __init__(self, name):
        self.name = name
        chars = ['<', '>', '?', '.']
        self.char2count = {char: 0 for char in chars}
        self.char2index = {char: index for index, char in enumerate(chars)}
        self.n_chars = len(chars)
        self.index2char = {index: char for index, char in enumerate(chars)}


    def addAllCharactersFromWord(self, word):
        count = 0
        while True:
            self.addChar(word[count])
            count += 1
            if count == len(word):
                break

    def printValues(self):
        print("char2index:")
        for char, index in self.char2index.items():
           print(f"  {char}: {index}")
    
        print("char2count:")
        for char, count in self.char2count.items():
          print(f"  {char}: {count}")
    
        print("index2char:")
        for index, char in self.index2char.items():
          print(f"  {index}: {char}")
   
    def addChar(self, char):
       if char not in self.char2index:
          self.index2char[self.n_chars] = char
          self.char2index[char] = self.n_chars
          self.char2count[char] = 0
          self.n_chars += 1
       self.char2count[char] += 1


In [18]:

class encodeText(nn.Module):
    def __init__(self, input_size, configuration):

      super(encodeText, self).__init__()
      self.hidden_size = configuration['hidden_size']
      self.cell_type = configuration["cell_type"]
      self.batch_size = configuration['batch_size']
      self.dropout = nn.Dropout(configuration['drop_out']) 
      self.embedding_size = configuration['embedding_size']
      self.bidirectional = configuration['bi_directional']
      self.embedding = nn.Embedding(input_size, self.embedding_size)
      

    # Initialize the recurrent unit layer.
      if self.cell_type == 'LSTM':
         self.cell_layer = nn.LSTM(
             self.embedding_size,
             self.hidden_size,
             num_layers=configuration["num_layers_encoder"],
             dropout=configuration['drop_out'],
             bidirectional=configuration['bi_directional']
        )
      elif self.cell_type == 'GRU':
         self.cell_layer = nn.GRU(
             self.embedding_size,
             self.hidden_size,
             num_layers=configuration["num_layers_encoder"],
             dropout=configuration['drop_out'],
             bidirectional=configuration['bi_directional']
        )
      else: 
         self.cell_layer = nn.RNN(
             self.embedding_size,
             self.hidden_size,
             num_layers=configuration["num_layers_encoder"],
             dropout=configuration['drop_out'],
             bidirectional=configuration['bi_directional']
        )

    def forward(self, input, hidden):
      
        weight = self.embedding.weight.to(input.device)
        embedded = F.embedding(input, weight)
        embedded = self.dropout(embedded.view(1, self.batch_size, -1))
        output, hidden = self.cell_layer(embedded, hidden)
        return output, hidden

 

    def initializeHiddenState(self, num_layers):
        enable_gpu=torch.cuda.is_available()
        num_directions = 2 if self.bidirectional else 1
        hidden_size = self.hidden_size // num_directions
        res = torch.zeros(num_layers * num_directions, self.batch_size, hidden_size)
        if enable_gpu:
           res = res.cuda()
        return res


In [19]:
class decodeText(nn.Module):
    def __init__(self, configuration, output_size):

        super(decodeText, self).__init__()
        # Save the configuration parameters.
        self.cell_type = configuration["cell_type"]
        self.hidden_size = configuration['hidden_size']
        self.batch_size = configuration['batch_size']
        self.num_layers = configuration['num_layers_decoder']
        self.embedding_size = configuration['embedding_size']
        self.bidirectional = configuration['bi_directional']
        self.embedding = nn.Embedding(output_size, self.embedding_size)
        self.dropout = nn.Dropout(configuration['drop_out'])

        # Initialize the recurrent unit layer.
        if self.cell_type == 'RNN':
            self.cell_layer = nn.RNN(
                self.embedding_size,
                self.hidden_size,
                num_layers=self.num_layers,
                dropout=configuration['drop_out'],
                bidirectional=self.bidirectional
            )
        elif self.cell_type == 'GRU':
            self.cell_layer = nn.GRU(
                self.embedding_size,
                self.hidden_size,
                num_layers=self.num_layers,
                dropout=configuration['drop_out'],
                bidirectional=self.bidirectional
            )
        elif self.cell_type == 'LSTM':
            self.cell_layer = nn.LSTM(
                self.embedding_size,
                self.hidden_size,
                num_layers=self.num_layers,
                dropout=configuration['drop_out'],
                bidirectional=self.bidirectional
            )
            
        self.out = nn.Linear(self.hidden_size, output_size)
        if self.bidirectional:
            self.out = nn.Linear(self.hidden_size * 2, output_size)

        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.dropout(self.embedding(input).view(1, self.batch_size, -1))
        output = F.relu(output)
        output, hidden = self.cell_layer(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initializeHiddenState(self):

        num_directions = 2 if self.bidirectional else 1
        res = torch.zeros(self.num_layers * num_directions, self.batch_size, self.hidden_size)
        if enable_gpu:
            return res.cuda()
        return res


In [20]:


def indexesFromWord(lang, word):
    """
    Converts a word to a list of indexes.

    Args:
        lang: The language model.
        word: The word to convert.

    Returns:
        A list of indexes.
    """
    # result=[]
    # index =0 
    # temp =len(word)
    # while(index<temp):
    #   #result.append(lang.char2index[word[index]]) if word[index] in lang.char2index.keys() else result.append(token_mapping['UNK'])
    #   if word[index] in lang.char2index.keys():
    #     result.append(lang.char2index[word[index]])
    #   else:
    #     z= token_mapping['UNK']
    #     result.append(z)
    #   index = index+1
    # return result
    return [lang.char2index[char] for char in word]


def variableFromSentence(lang, sentence, max_length):
    """
    Converts a sentence to a variable.

    Args:
        lang: The language model.
        sentence: The sentence to convert.
        max_length: The maximum length of the sentence.

    Returns:
        A variable.
    """

    indexes = indexesFromWord(lang, sentence)
    indexes.append(token_mapping['end'])
    indexes.extend([token_mapping['Padding_token']] * (max_length - len(indexes)))
    if(enable_gpu):
      return torch.LongTensor(indexes).cuda()
      
    return torch.LongTensor(indexes)


def variablesFromPairs(input_lang, output_lang, pairs, max_length):
    """
    Converts a list of pairs to a list of variables.

    Args:
        input_lang: The input language model.
        output_lang: The output language model.
        pairs: The list of pairs to convert.
        max_length: The maximum length of the sentences.

    Returns:
        A list of variables.
    """

    res = []
    for pair in pairs:
        input_variable = variableFromSentence(input_lang, pair[0], max_length)
        output_variable = variableFromSentence(output_lang, pair[1], max_length)
        res.append((input_variable, output_variable))
    return res



In [21]:
def modelEvaluation(encoder, decoder, loader, configuration, criterion, max_length, output_lang):
    """
    modelEvaluations the performance of the encoder-decoder model on the given data.

    Args:
        encoder: The encoder model.
        decoder: The decoder model.
        loader: The data loader.
        configuration: The configuration parameters.
        criterion: The loss function.
        max_length: The maximum length of a sequence.
        output_lang: The output language.

    Returns:
        The accuracy and loss of the model.
    """
    
    batch_size = configuration['batch_size']
    loss ,total,correct= 0,0,0
    enable_gpu = torch.cuda.is_available()
    

    for batch_input, batch_output in loader:
        batch_loss = 0
        numLayersEncoder = configuration['num_layers_encoder']
        encoder_hidden = encoder.initializeHiddenState(numLayersEncoder)
        if configuration["cell_type"] == "LSTM":
            encoder_cell_state = encoder.initializeHiddenState(numLayersEncoder)
            encoder_hidden = (encoder_hidden, encoder_cell_state)
        # if "cell_type" in configuration and configuration["cell_type"] == "LSTM":
        #     encoder_hidden = (encoder_hidden, encoder.initializeHiddenState(configuration['num_layers_encoder'])[1])


        input_variable = batch_input.transpose(0, 1)
        output_variable = batch_output.transpose(0, 1)

        input_length = input_variable.size(0)
        target_length = output_variable.size(0)

        output = torch.LongTensor(target_length, batch_size)

        encoder_outputs = torch.zeros(max_length, batch_size, encoder.hidden_size)
        if enable_gpu:
            encoder_outputs = encoder_outputs.cuda()

        for i in range(input_length):
            encoder_output, encoder_hidden = encoder(input_variable[i], encoder_hidden)
            encoder_outputs[i] = encoder_output

        decoder_input = torch.LongTensor([token_mapping['start']] * batch_size)
        if enable_gpu:
            decoder_input = decoder_input.cuda()

        decoder_hidden = encoder_hidden
        for j in range(target_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            temp1 =criterion(decoder_output, output_variable[j].squeeze())
            batch_loss = batch_loss + temp1
            _ , topi = decoder_output.data.topk(1)
            decoder_input = torch.cat((topi.squeeze(),))

            output[j] = topi.squeeze()

        output = output.transpose(0, 1)

        for k in range(output.size(0)):
            ignore = [token_mapping['start'], token_mapping['end'], token_mapping['Padding_token']]
            sent = [output_lang.index2char[letter.item()] for letter in output[k] if letter.item() not in ignore]
            y = [output_lang.index2char[letter.item()] for letter in batch_output[k] if letter.item() not in ignore]
            # print(sent)
            # print("prediciton")
            # print(y)
            if sent != y:
                correct =correct
            else:
              correct = correct+1

            total=total+1
            # correct = correct + 1 if sent == 'y' else correct
            # total = total + 1


        accuracy = (correct/total) * 100
        temp1= batch_loss.item() / target_length
        loss = loss + temp1
    return accuracy, loss


In [22]:
def train(input_tensor, output_tensor, encoder, decoder,train_loader,val_loader, encoder_optimizer, decoder_optimizer, criterion, configuration, max_length,batch_size):

    #batch_size = configuration['batch_size']
    enable_gpu = torch.cuda.is_available()
    
    numLayersEncoder = configuration['num_layers_encoder']
    encoder_hidden = encoder.initializeHiddenState(numLayersEncoder)
    if configuration["cell_type"] == "LSTM":
        encoder_cell_state = encoder.initializeHiddenState(numLayersEncoder)
        encoder_hidden = (encoder_hidden, encoder_cell_state)

    input_tensor = input_tensor.transpose(0, 1)
    output_tensor = output_tensor.transpose(0, 1)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    encoder_outputs = torch.zeros(max_length, batch_size, encoder.hidden_size)
    if enable_gpu:
        encoder_outputs = encoder_outputs.cuda()

    loss = 0
    input_length = input_tensor.size(0)
    output_length = output_tensor.size(0)

    encoder_hidden = encodeInputSequence(encoder, input_tensor, encoder_hidden, input_length)

    decoder_input = torch.LongTensor([token_mapping['start']] * batch_size)
    if enable_gpu:
        decoder_input = decoder_input.cuda()
    teacher_forcing_ratio = configuration['teacher_forcing_ratio']
    decoder_hidden = encoder_hidden
    use_teacher_forcing = random.random() < teacher_forcing_ratio

    if use_teacher_forcing:
        loss = decodeWithTeacherForcing(decoder, decoder_input, decoder_hidden, output_tensor, criterion, output_length, loss)
    else:
        loss = decodeWithoutTeacherForcing(decoder, decoder_input, decoder_hidden, output_tensor, criterion, output_length, loss, enable_gpu)

    loss /= output_length
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()

def encodeInputSequence(encoder, input_tensor, encoder_hidden, input_length):
    for i in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[i], encoder_hidden)
    return encoder_hidden

def decodeWithTeacherForcing(decoder, decoder_input, decoder_hidden, output_tensor, criterion, output_length, loss):
    for i in range(output_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        decoder_input = output_tensor[i]
        loss = loss + criterion(decoder_output, output_tensor[i])
    return loss

def decodeWithoutTeacherForcing(decoder, decoder_input, decoder_hidden, output_tensor, criterion, output_length, loss, enable_gpu):
    for i in range(output_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        _,topi= decoder_output.data.topk(1)
        decoder_input = topi.squeeze().detach()
        if enable_gpu:
            decoder_input = decoder_input.cuda()
        loss =loss + criterion(decoder_output, output_tensor[i])
    return loss


In [23]:
def trainAndEvaluate(encoder, decoder, train_loader, val_loader, configuration, max_len, max_len_all,input_lang,output_lang):
    #print("checkpoint-6")
    z=configuration['learning_rate']
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=z)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=z)
    criterion = nn.NLLLoss()

    for epoch in range(configuration['epochs']):
        print('Epoch {}/{}'.format(epoch + 1, configuration['epochs']))
        train_loss = 0
   
        batch_no = 1
        for batch_input, batch_output in train_loader:
            loss = None
            if not configuration['attention']:
                # print("checkpoint-7")
                loss = train(batch_input, batch_output, encoder, decoder,train_loader,val_loader,encoder_optimizer, decoder_optimizer, criterion, configuration, max_len_all,configuration['batch_size'])
                # print("checkpoint-8")

        train_loss += loss
        batch_no += 1

        print('Train loss: {}'.format(train_loss / len(train_loader)))

        validation_accuracy, validation_loss = modelEvaluation(encoder, decoder, val_loader, configuration, criterion, max_len, output_lang)
#        print("checkpoint-9")
        print('Validation loss: {}'.format(validation_loss / len(val_loader)))
        print('Validation accuracy: {}'.format(validation_accuracy))
        wandb.log({'validation_loss': validation_loss/len(val_loader), 'validation_accuracy': validation_accuracy, 'train_loss': train_loss/len(train_loader)})

        

# configuration = {
#             "hidden_size" : 256,
#             "input_lang" : 'eng',
#             "output_lang" : 'hin',
#             "cell_type"   : 'RNN',
#             "num_layers_encoder" : 1 ,
#             "num_layers_decoder" : 1,
#             "drop_out"    : 0, 
#             "embedding_size" : 128,
#             "bi_directional" : False,
#             "batch_size" : 128,
#             "attention" : False ,
#             "learning_rate" : 0.001,
#             "epochs":10,
#             "teacher_forcing_ratio": 0.5
#         }



In [24]:
import os
import random
import torch
import torch.utils.data

def prepare_data(configuration,file_path, lang_1, lang_2):
    input_lang, output_lang, pairs, max_input_length, max_target_length = prepareData(file_path, lang_1, lang_2)
    return  input_lang, output_lang, pairs, max_input_length, max_target_length
    

def variables_from_pairs(input_lang, output_lang, pairs, max_len):
    return [input_lang.variable_from_pair(pair, max_len) for pair in pairs]

def train_model(encoder, decoder, train_loader, val_loader, configuration, max_len, max_len_all,input_lang,output_lang,enable_gpu):
    d = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if(enable_gpu):
      encoder.to(device)
      decoder.to(device)

    trainAndEvaluate(encoder, decoder, train_loader, val_loader, configuration, max_len, max_len_all,input_lang, output_lang)


dir = '/content/aksharantar'

# def train_language_model(dir, lang_1, lang_2, configuration):
#     train_path = os.path.join(dir, lang_2, f"{lang_2}_train.csv")
#     test_path = os.path.join(dir, lang_2, f"{lang_2}_test.csv")
#     validation_path = os.path.join(dir, lang_2, f"{lang_2}_valid.csv")
    
#     data_paths = [train_path, test_path, validation_path]
#     max_lengths = []
#     prepared_data = []

#     for path in data_paths:
#         input_lang, output_lang, pairs, max_input_length, max_target_length = prepare_data(path, lang_1, lang_2)
#         prepared_data.append((input_lang, output_lang, pairs))
#         max_lengths.append((max_input_length, max_target_length))

#     (input_lang, output_lang, pairs), (test_input_lang, test_output_lang, test_pairs), (val_input_lang, val_output_lang, val_pairs) = prepared_data
#     (max_input_length, max_target_length), (max_input_length_test, max_target_length_test), (max_input_length_val, max_target_length_val) = max_lengths

#     return (input_lang, output_lang, pairs, max_input_length, max_target_length), \
#            (test_input_lang, test_output_lang, test_pairs, max_input_length_test, max_target_length_test), \
#            (val_input_lang, val_output_lang, val_pairs, max_input_length_val, max_target_length_val)



def train_language_model(dir, lang_1, lang_2, configuration,batch_size,enable_gpu):
    train_path = os.path.join(dir, lang_2, lang_2 + '_train.csv')
    test_path = os.path.join(dir, lang_2, lang_2 + '_test.csv')
    validation_path = os.path.join(dir, lang_2, lang_2 + '_valid.csv')
    input_lang, output_lang, pairs, max_input_length, max_target_length = prepare_data(configuration,train_path, lang_1, lang_2,)
    test_input_lang, test_output_lang, test_pairs, max_input_length_test, max_target_length_test = prepare_data(configuration,test_path, lang_1, lang_2)
    val_input_lang, val_output_lang, val_pairs, max_input_length_val, max_target_length_val = prepare_data(configuration,validation_path, lang_1, lang_2)
    
    print("checkpoint-1")
    print(random.choice(pairs))

    
    max_list = [max_input_length, max_target_length, max_input_length_val, max_target_length_val, max_input_length_test, max_target_length_test]
    max_len_all = sorted(max_list)[-1]
    max_len = max(max_input_length, max_target_length)
    max_len +=2
    print("checkpoint-2")


   
    pairs = variablesFromPairs(input_lang, output_lang, pairs, max_len)
    val_pairs = variablesFromPairs(input_lang, output_lang, val_pairs, max_len_all)
    print("checkpoint-3")
        

    text_encoder = encodeText(input_lang.n_chars, configuration)
    text_decoder = decodeText(configuration, output_lang.n_chars)
    print("checkpoint-4")
    trainDataloader = torch.utils.data.DataLoader(pairs, batch_size=batch_size, shuffle=True)
    valDataloader = torch.utils.data.DataLoader(val_pairs, batch_size=batch_size, shuffle=True)

    

    if not configuration['attention']:
        train_model(text_encoder,text_decoder, trainDataloader, valDataloader, configuration, max_len, max_len_all,input_lang,output_lang,enable_gpu)
        print("Code is successfully Executed...")



        

In [25]:
# enable_gpu= torch.cuda.is_available()
# train_language_model(dir, token_mapping['lang_1'], token_mapping['lang_2'], configuration,configuration['batch_size'],enable_gpu)

In [None]:
import wandb
sweepConfiguration ={
    'method':'bayes'
}

metric = {
    'name' : 'validation_accuracy',
    'goal' : 'maximize'
}
sweepConfiguration['metric'] = metric

hyperParameters={
    'inputLanguage':{
        'values':['hin']
    },
    'batchSize':{
        'values' : [32,64,128]
    },
    'teacherForcingRatio':{
        'values': [0.5]
    },
    'epochs':{
        'values':[5,10,15,20]
    },
    'embeddingDim':{
        'values' : [64,128,256,512]
    },

    'learningRate':{
        'values' : [1e-2,1e-3,1e-1]
    },
    'recurrentCell':{
        'values' : ['GRU','RNN','LSTM']
    },
    'outputLanguage':{
        'values':['eng']
    },
    'decoderLayers':{
        'values' : [1,2,3]
    },
    'hiddenSize':{
        'values' : [128,256,512]
    },
    'dropOutRate':{
        'values' : [0.0,0.2,0.3]
    },
    
    
    'biDirectional':{
        'values' : [True,False]
    },
     'encoderLayers':{
        'values' : [1,2,3]
    }
}
sweepConfiguration['parameters'] =hyperParameters

sweep_id = wandb.sweep(sweepConfiguration, project = 'dl_assignement_3')


def sweepfunction():
    config = None
    with wandb.init(config = config, entity = 'cs22m024') as run:
        config = wandb.config
        run.name='hl_'+str(config.hiddenSize)+'_bs_'+str(config.batchSize)+'_ct_'+config.recurrentCell+'_lr_'+str(config.learningRate)
        configuration = {
            "hidden_size" : config.hiddenSize,
            "input_lang" : config.inputLanguage,
            "teacher_forcing_ratio":config.teacherForcingRatio,
            "cell_type"   : config.recurrentCell,
            "attention" : False ,
            "learning_rate" :config.learningRate,
            "num_layers_decoder" : config.encoderLayers,
            "epochs":config.epochs,
            "drop_out"    :config.dropOutRate, 
            "embedding_size" : config.embeddingDim,
            "bi_directional" : config.biDirectional,
            "batch_size" : config.batchSize,
            "num_layers_encoder" : config.encoderLayers,
            "output_lang" : config.outputLanguage
           
            
    
        }
        train_language_model(dir, token_mapping['lang_1'], token_mapping['lang_2'], configuration,configuration['batch_size'],enable_gpu)
        


wandb.agent(sweep_id, sweepfunction, count = 5)



Create sweep with ID: 735twhle
Sweep URL: https://wandb.ai/cs22m024/dl_assignement_3/sweeps/735twhle


[34m[1mwandb[0m: Agent Starting Run: 8n19pd67 with config:
[34m[1mwandb[0m: 	batchSize: 32
[34m[1mwandb[0m: 	biDirectional: False
[34m[1mwandb[0m: 	decoderLayers: 1
[34m[1mwandb[0m: 	dropOutRate: 0
[34m[1mwandb[0m: 	embeddingDim: 128
[34m[1mwandb[0m: 	encoderLayers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hiddenSize: 256
[34m[1mwandb[0m: 	inputLanguage: hin
[34m[1mwandb[0m: 	learningRate: 0.001
[34m[1mwandb[0m: 	outputLanguage: eng
[34m[1mwandb[0m: 	recurrentCell: GRU
[34m[1mwandb[0m: 	teacherForcingRatio: 0.5


checkpoint-1
['prakarhai', 'प्रकारहै']
checkpoint-2
checkpoint-3
checkpoint-4
Epoch 1/5
Train loss: 0.0002996120415627956
Validation loss: 0.42542631637591566
Validation accuracy: 15.087890625
Epoch 2/5
