<a href="https://colab.research.google.com/github/Amit-Prasad/transliteration/blob/main/transliteration_devnagri.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar

--2023-12-06 13:06:14--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.194.207, 142.250.4.207, 172.253.118.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.194.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2023-12-06 13:08:05 (17.5 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]



In [None]:
!tar -xvf dakshina_dataset_v1.0.tar

dakshina_dataset_v1.0/bn/
dakshina_dataset_v1.0/bn/lexicons/
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.test.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.train.tsv
dakshina_dataset_v1.0/bn/lexicons/bn.translit.sampled.dev.tsv
dakshina_dataset_v1.0/bn/native_script_wikipedia/
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.valid.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.info.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-filt.train.text.shuf.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.nonblock.sections.tsv.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.omit_pages.txt.gz
dakshina_dataset_v1.0/bn/native_script_wikipedia/bn.wiki-full.text.sorted.tsv.gz
dakshina_dataset_v1.0/bn/na

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import math
import time
import random
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import os


#device = xm.xla_device()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
data_features = {}

In [None]:
def prepareData(data_path):
    # Vectorize the data.
    input_texts = []
    target_texts = []
    input_characters = set()
    target_characters = set()
    #with open(data_path, "r", encoding="utf-8") as f:
    lines = open(data_path,'rt', encoding='utf-8').\
            read().strip().split('\n')
    random.shuffle(lines)
    for line in lines[:(len(lines) - 1)]:
        target_text, input_text, _ = line.split("\t")
        # We use "tab" as the "start sequence" character
        # for the targets, and "\n" as "end sequence" character.
        target_text = target_text + "\n"
        input_texts.append(input_text)
        target_texts.append(target_text)
        for char in input_text:
            if char not in input_characters:
                input_characters.add(char)
        for char in target_text:
            if char not in target_characters:
                target_characters.add(char)

    input_characters.add(" ")
    target_characters.add(" ")
    target_characters.add("\t")
    input_characters = sorted(list(input_characters))
    target_characters = sorted(list(target_characters))
    num_encoder_tokens = len(input_characters)
    num_decoder_tokens = len(target_characters)
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    print("Number of samples:", len(input_texts))
    print("Number of unique input tokens:", num_encoder_tokens)
    print("Number of unique output tokens:", num_decoder_tokens)
    print("Max sequence length for inputs:", max_encoder_seq_length)
    print("Max sequence length for outputs:", max_decoder_seq_length)

    input_token_index = dict([(char, i) for i, char in enumerate(input_characters)])
    target_token_index = dict([(char, i) for i, char in enumerate(target_characters)])

    data_features.update({'input_token_index':input_token_index})
    data_features.update({'target_token_index':target_token_index})
    data_features.update({'num_encoder_tokens':num_encoder_tokens})
    data_features.update({'num_decoder_tokens':num_decoder_tokens})
    data_features.update({'max_encoder_seq_length':max_encoder_seq_length})
    data_features.update({'max_decoder_seq_length':max_decoder_seq_length})


    encoder_input_data = np.zeros(
        (len(input_texts), max_encoder_seq_length), dtype="int"
    )
    decoder_input_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="int"
    )
    decoder_target_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype="int"
    )
    mask = np.zeros(
        (len(input_texts), max_decoder_seq_length), dtype="bool"
    )

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t] = input_token_index[char]
        encoder_input_data[i, t + 1 :] = input_token_index[" "]
        for t, char in enumerate(target_text):
            # decoder_target_data is ahead of decoder_input_data by one timestep

            decoder_target_data[i, t, target_token_index[char]] = 1 #one hot encoding
            mask[i,t] = True
            #if t > 0:
                # decoder_target_data will be ahead by one timestep
                # and will not include the start character.
                #decoder_target_data[i, t - 1, target_token_index[char]] = 1
        decoder_target_data[i, t + 1 :, target_token_index[" "]] = 1
        mask[i,t+1:] = False
        #decoder_target_data[i, t :, target_token_index[" "]] = 1

    data_features.update({'encoder_input_data':encoder_input_data})
    #data_features.update({'decoder_input_data':decoder_input_data})
    data_features.update({'decoder_target_data':decoder_target_data})

    data_features.update({'encoder_input_text_data':input_texts})
    data_features.update({'decoder_target_text_data':target_texts})
    data_features.update({'mask':mask})

prepareData('dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv')

Number of samples: 44203
Number of unique input tokens: 27
Number of unique output tokens: 66
Max sequence length for inputs: 20
Max sequence length for outputs: 20


In [None]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, cell_type, n_layers, dropout):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.layers = n_layers

        self.embedding = nn.Embedding(input_size, hidden_size)
        if cell_type == 'rnn':
            self.encoder = nn.RNN(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        elif cell_type == 'lstm':
            self.encoder = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        else:
            self.encoder = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)

    def forward(self, input, hidden):
        embedded = self.embedding(input)
        output = embedded
        output, hidden = self.encoder(output, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(self.layers, batch_size, self.hidden_size, device=device)

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, batch_size, cell_type, n_layers, dropout):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.layers = n_layers

        self.embedding = nn.Embedding(output_size, hidden_size)
        if cell_type=='rnn':
            self.decoder = nn.RNN(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        elif cell_type=='lstm':
            self.decoder = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        else:
            self.decoder = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)

        #self.dropout = nn.Dropout(dropout2)

        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=2)

    def forward(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output).view(-1, 1, self.hidden_size)
        output, hidden = self.decoder(output, hidden)
        #print(self.out(output).size())
        #output = self.dropout(output)
        output = self.softmax(self.out(output))
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(self.layers, batch_size, self.hidden_size, device=device)

In [None]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, batch_size, cell_type, n_layers, max_length, dropout, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.cell_type=cell_type

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        if cell_type == 'rnn':
            self.decoder = nn.RNN(self.hidden_size, self.hidden_size, n_layers, dropout = dropout, batch_first=True)
        elif cell_type == 'lstm':
            self.decoder = nn.LSTM(self.hidden_size, self.hidden_size, n_layers, dropout = dropout, batch_first=True)
        else:
            self.decoder = nn.GRU(self.hidden_size, self.hidden_size, n_layers, dropout = dropout, batch_first=True)

        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        input=input.view(-1,1)

        embedded = self.embedding(input)
        embedded = self.dropout(embedded)

        if self.cell_type=='lstm':
            attn_weights = F.softmax(
                self.attn(torch.cat((embedded, hidden[0].view(-1,1, self.hidden_size)), 2)), dim=2)
        else:
            hidden = hidden.view(-1,1, self.hidden_size)
            attn_weights = F.softmax(
            self.attn(torch.cat((embedded, hidden), 2)), dim=2)
        attn_applied = torch.bmm(attn_weights, encoder_outputs)

        output = torch.cat((embedded, attn_applied), 2)
        output = self.attn_combine(output)

        output = F.relu(output)
        if self.cell_type=='lstm':
            output, hidden = self.decoder(output, hidden)
        else:
            hidden = hidden.view(1,-1,self.hidden_size)
            output, hidden = self.decoder(output, hidden)

        output, hidden = self.decoder(output, hidden)

        output = F.softmax(self.out(output), dim=2)
        return output, hidden, attn_weights

    def initHidden(self, batch_size):
        return torch.zeros(self.layers, batch_size, self.hidden_size, device=device)

In [None]:
#change to tensors
encoder_input_tensor = torch.from_numpy(data_features['encoder_input_data']).to(device)
#decoder_input_tensor = torch.from_numpy(data_features['decoder_input_data']).to(device)
decoder_target_tensor = torch.from_numpy(data_features['decoder_target_data']).to(device)
mask = torch.from_numpy(data_features['mask']).to(device)
print(encoder_input_tensor.size())
#print(decoder_input_tensor.size())
print(decoder_target_tensor.size())

torch.Size([44203, 20])
torch.Size([44203, 20, 66])


In [None]:
def maskedCEloss(input, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(input, 1, target.view(-1, 1))).squeeze(1)
    loss = crossEntropy.mean()


    if math.isnan(loss) or math.isinf(loss):
      print(f"Nan found : {input}, {target}, {mask}")

    #loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [None]:
def getOptimizer(name,enc_params,dec_params,lr):
    if name == 'rmsprop':
        enc_opt = optim.RMSprop(enc_params, lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
        dec_opt = optim.RMSprop(dec_params, lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    elif name == 'adam' :
        enc_opt = optim.Adam(enc_params, lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
        dec_opt = optim.Adam(dec_params, lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
    if name == 'sparseadam':
        enc_opt = optim.SparseAdam(enc_params, lr, betas=(0.9, 0.999), eps=1e-08)
        dec_opt = optim.SparseAdam(dec_params, lr, betas=(0.9, 0.999), eps=1e-08)

    return (enc_opt,dec_opt)

In [None]:
def train(input_tensor, target_tensor, masked, encoder, decoder, cell_type, batch_size, encoder_optimizer, decoder_optimizer, criterion, clip = 50.0,attention=0):
    if cell_type=='lstm':
        encoder_hidden = (encoder.initHidden(batch_size), encoder.initHidden(batch_size))
    else:
        encoder_hidden = encoder.initHidden(batch_size)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    encoder_output, encoder_hidden = encoder(
            input_tensor, encoder_hidden)

    decoder_hidden = encoder_hidden
    #print(encoder_hidden)

    loss=0

    #With teacher forcing
    decoder_input = torch.zeros_like(target_tensor[:,0,:]).to(device)
    decoder_input[:,data_features['target_token_index']['\t']] = 1
    decoder_input = torch.argmax(decoder_input,1).view(-1,1)
    for i in range(target_tensor.size()[1]):
        if attention == 1:
            #print(decoder_input.size())
            decoder_output, decoder_hidden, decoder_attention = decoder(
                torch.argmax(decoder_input,1).view(-1,1), decoder_hidden, encoder_output)

        else:
            decoder_output, decoder_hidden = decoder(
                    torch.argmax(decoder_input,1).view(-1,1), decoder_hidden)
        #print(decoder_output.size())
        if (masked[:,i].sum()!=0):
            mask_loss, nTotal = maskedCEloss(decoder_output.squeeze().float(),torch.argmax(target_tensor[:,i,:],1),masked[:,i])
            loss+=mask_loss

        #loss+=criterion(decoder_output.squeeze().float(), torch.argmax(target_tensor[:,i,:],1))
        decoder_input = target_tensor[:,i,:]

    '''
    #without teacher forcing
    decoder_input = torch.zeros_like(target_tensor[:,0,:]).to(device)
    decoder_input[:,data_features['target_token_index']['\t']] = 1
    decoder_input = torch.argmax(decoder_input,1).view(-1,1)
    for i in range(target_tensor.size()[1]):
        decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        if (masked[:,i].sum()!=0):
            mask_loss, nTotal = maskedCEloss(decoder_output.squeeze().float(),torch.argmax(target_tensor[:,i,:],1),masked[:,i])
            loss+=mask_loss
        #print(mask_loss)
        decoder_input = topi.squeeze().detach()
    '''

    loss.backward()

    if (clip > 1):
        _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
        _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)


    encoder_optimizer.step()
    decoder_optimizer.step()


    #xm.optimizer_step(encoder_optimizer, barrier=True)  # Note: Cloud TPU-specific code!
    #xm.optimizer_step(decoder_optimizer, barrier=True)  # Note: Cloud TPU-specific code!
    avg_loss = loss.item() / (input_tensor.size()[1]*batch_size)
    if math.isnan(avg_loss) or math.isinf(avg_loss):
      print(f'Loss num : {loss.item()} ; Loss Deno : {input_tensor.size()[1] , batch_size}')
    return avg_loss




def trainIters(encoder, decoder, cell_type, n_epochs, batch_size, val_split,optimizer = 'rmsprop', learning_rate=0.001,clip = 0.0, attention=0):

    start = time.time()

    encoder_optimizer, decoder_optimizer = getOptimizer(optimizer,encoder.parameters(),decoder.parameters(),learning_rate)

    train_end_index = int(encoder_input_tensor.size()[0]*(1-val_split))

    criterion = nn.CrossEntropyLoss()

    for i in range(n_epochs):
        total_loss=0
        for j in range(0, train_end_index, batch_size):
            input_tensor = encoder_input_tensor[j:j+batch_size]
            #decoder_input = decoder_input_tensor[j:j+batch_size]
            target_tensor = decoder_target_tensor[j:j+batch_size]
            masked = mask[j:j+batch_size]
            total_loss += train(input_tensor, target_tensor, masked, encoder,
                     decoder, cell_type, batch_size , encoder_optimizer, decoder_optimizer, criterion, clip,attention)
        avg_loss = float(total_loss)/(float(train_end_index)/batch_size)
        if math.isnan(avg_loss) or math.isinf(avg_loss):
            print({'incorrect_loss' : f'{total_loss},{train_end_index},{batch_size}'})
        print(f'Epoch {i+1} ; Loss : {avg_loss}')
    end=time.time()
    print("Time Taken "+str(end-start))









In [None]:
hidden_size = 1024
batch_size = 64
encoder1 = EncoderRNN(data_features['num_encoder_tokens'], hidden_size, batch_size, 'lstm', 2, 0.1).to(device)
decoder1 = DecoderRNN(hidden_size, data_features['num_decoder_tokens'], batch_size, 'lstm', 2, 0.1).to(device)
#attn_decoder1 = AttnDecoderRNN(hidden_size, data_features['num_decoder_tokens'], batch_size, 'lstm', 1, data_features['max_encoder_seq_length'], dropout_p=0.1, dropout=0).to(device)
trainIters(encoder1, decoder1, 'lstm', 20, batch_size, 0.1, optimizer = 'adam', learning_rate=0.001, clip=3000,attention=0)

Epoch 1 ; Loss : 0.009588249565986754
Epoch 2 ; Loss : 0.00275625240017066
Epoch 3 ; Loss : 0.0018141970091602997
Epoch 4 ; Loss : 0.0013127336684685775
Epoch 5 ; Loss : 0.0009964150326097732
Epoch 6 ; Loss : 0.0008069863688154085
Epoch 7 ; Loss : 0.0006860249250157869
Epoch 8 ; Loss : 0.0006095264425186489
Epoch 9 ; Loss : 0.0005659350665106111
Epoch 10 ; Loss : 0.0005165679206028765
Epoch 11 ; Loss : 0.00047878336147703243
Epoch 12 ; Loss : 0.0004448100369753065
Epoch 13 ; Loss : 0.00042970656191584705
Epoch 14 ; Loss : 0.0004027592843331683
Epoch 15 ; Loss : 0.0003853243688999528
Epoch 16 ; Loss : 0.00035514701855445706
Epoch 17 ; Loss : 0.0003440992332754091
Epoch 18 ; Loss : 0.0003375482949656328
Epoch 19 ; Loss : 0.0003279341196078629
Epoch 20 ; Loss : 0.00032041225327466004
Time Taken 1284.7300944328308


In [None]:
def tensorPairs(word1, word2):
    tensor1 = torch.zeros([len(word1), 1], dtype=torch.long, device=device)
    for i, char in enumerate(word1):
        tensor1[i] = data_features['input_token_index'][char]

    tensor2 = torch.zeros([len(word2), 1], dtype=torch.long, device=device)
    for i, char in enumerate(word2):
        tensor2[i] = data_features['target_token_index'][char]

    return (tensor1, tensor2)


def get_key(d, val):
    return [k for k, v in d.items() if v == val]

def WordTensor(tensor):
    char = get_key(data_features['target_token_index'], tensor)
    #print(char)
    return char


def evaluate(encoder, decoder, cell_type, input_tensor, max_length=data_features['max_encoder_seq_length'], attention=1):
    with torch.no_grad():
        #print(word)
        #input_tensor = tensorWord(word)
        input_length = input_tensor.size()[0]

        if cell_type=='lstm':
            encoder_hidden = (encoder.initHidden(1), encoder.initHidden(1))
        else:
            encoder_hidden = encoder.initHidden(1)

        encoder_outputs = torch.zeros(1, max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei].view(1,-1),
                                                     encoder_hidden)

            encoder_outputs[0,ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[data_features['target_token_index']['\t']]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_word = ''
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            if attention == 1:
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
            else:
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden)
            #decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if chr(ord(WordTensor(topi.item())[0])) == '\n':
                #decoded_words.append('<EOS>')
                break
            else:
                #print(WordTensor(topi.item()))
                decoded_word = decoded_word+chr(ord(WordTensor(topi.item())[0]))

            decoder_input = topi.squeeze().detach()

        return decoded_word#, decoder_attentions[:di + 1]




In [None]:
def evaluateRandomly(encoder, decoder, cell_type,  n=10,attention=0):
    for i in range(n):
        #index = random.randint(0, len(data_features['encoder_input_data'])-1)
        index = i
        pair = tensorPairs(data_features['encoder_input_text_data'][index], data_features['decoder_target_text_data'][index])
        print('>', data_features['encoder_input_text_data'][index])
        print('=', data_features['decoder_target_text_data'][index][0:-1])
        output_word = evaluate(encoder, decoder, cell_type,  pair[0],attention=attention)
        print('<', output_word)
        print('')

In [None]:
def validate(encoder, decoder, cell_type, val_split, attention=0):
    val_start_index = int(encoder_input_tensor.size()[0]*(1-val_split))+1
    accuracy=0
    for index in range(val_start_index, len(data_features['encoder_input_text_data'])):
        pair = tensorPairs(data_features['encoder_input_text_data'][index], data_features['decoder_target_text_data'][index])
        output_word = evaluate(encoder, decoder, cell_type,  pair[0],attention=attention)

        if (output_word == data_features['decoder_target_text_data'][index][0:-1]):
            accuracy+=1
    print(accuracy/(len(data_features['encoder_input_text_data'])-val_start_index))


In [None]:
def prepareTestData(test_path):
    lines = open(test_path,'rt', encoding='utf-8').\
        read().strip().split('\n')
    lang_in_texts = []
    lang_out_texts = []

    for line in lines:
        lang_out_text, lang_in_text, _ = line.strip('\n').split("\t")
        lang_in_texts.append(lang_in_text)
        lang_out_texts.append(lang_out_text)

    test_data={}

    test_data.update({'input':lang_in_texts})
    test_data.update({'target':lang_out_texts})
    return test_data

def test_validate(encoder, decoder, cell_type,outF, attention = 0):
    accuracy=0
    for index in range( len(test_data['input'])):
        pair = tensorPairs(test_data['input'][index], test_data['target'][index])
        output_word = evaluate(encoder, decoder, cell_type, pair[0],attention = attention)

        if (output_word == test_data['target'][index]):
            accuracy+=1

            l = f"{test_data['input'][index]}  {output_word}"
            outF.write(l)
            outF.write("\n")
            print(l)
    print(accuracy/(len(test_data['target'])))



outF = open("best_vanilla_pred.txt", "w")

test_data = prepareTestData('dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv')

test_validate(encoder1,decoder1,'lstm',outF,attention=0)
outF.close()

antah  अंतः
antrmukh  अंतर्मुख
agvaai  अगवाई
achyut  अच्युत
agyaat  अज्ञात
agyat  अज्ञात
atakta  अटकता
atkata  अटकता
adhivas  अधिवास
adhiwas  अधिवास
apnani  अपनानी
africa  अफ्रीका
amaanat  अमानत
amavasya  अमावस्या
alankaaron  अलंकारों
avarodhak  अवरोधक
avrodhak  अवरोधक
avasthaayen  अवस्थाएं
avsthaen  अवस्थाएं
avsthayen  अवस्थाएं
awasthaen  अवस्थाएं
asmanye  असमान्य
asurakshit  असुरक्षित
item  आइटम
aakaash  आकाश
aakash  आकाश
aake  आके
aagat  आगत
aagaman  आगमन
aagman  आगमन
aagashe  आगाशे
aazmaanaa  आजमाना
aadaan  आदान
aadhaarit  आधारित
aadharit  आधारित
aadheen  आधीन
aadhunik  आधुनिक
aapka  आपका
aapkaa  आपका
aapadaaaen  आपदाएं
aapdaein  आपदाएं
aapdayein  आपदाएं
aamdan  आमदन
aamdani  आमदनी
aamdi  आमदी
aayojak  आयोजक
aaraam  आराम
aarushi  आरुषि
aarya  आर्य
arya  आर्य
aaryika  आर्यिका
aryon  आर्यों
aalsi  आलसी
aalochak  आलोचक
aalochakon  आलोचकों
alochakon  आलोचकों
aalha  आल्हा
aalhaa  आल्हा
awaazon  आवाज़ों
aavishakaar  आविष्कार
aavishkaar  आविष्कार
aavritti  आवृत्ति
aashanka  आशंका
aastik  

In [None]:
evaluateRandomly(encoder1, decoder1, 'lstm', n=100,attention=0)


In [None]:
validate(encoder1, decoder1, 'lstm', 0.1,attention=0)