In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
from xml.etree import ElementTree
file_name = "/content/drive/MyDrive/Job_Assignments/Devnagiri/NEWS2018_M-EnHi_dev.xml"
full_file = os.path.abspath(os.path.join("xml", file_name))
dom = ElementTree.parse(full_file)
Name = dom.findall("Name")
with open("Output.txt", "w") as file:
  for f in Name:
      Type = f.find("SourceName").text
      Explanation = f.find("TargetName").text
      op = Type + ' ' + Explanation + '\n'
      file.write(op)

In [3]:
!pip install tensorflow==1.13.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==1.13.2
  Downloading tensorflow-1.13.2-cp37-cp37m-manylinux1_x86_64.whl (92.7 MB)
[K     |████████████████████████████████| 92.7 MB 33 kB/s 
Collecting tensorboard<1.14.0,>=1.13.0
  Downloading tensorboard-1.13.1-py3-none-any.whl (3.2 MB)
[K     |████████████████████████████████| 3.2 MB 46.3 MB/s 
[?25hCollecting tensorflow-estimator<1.14.0rc0,>=1.13.0
  Downloading tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367 kB)
[K     |████████████████████████████████| 367 kB 55.5 MB/s 
Collecting keras-applications>=1.0.6
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 7.1 MB/s 
Collecting mock>=2.0.0
  Downloading mock-4.0.3-py3-none-any.whl (28 kB)
Installing collected packages: mock, tensorflow-estimator, tensorboard, keras-applications, tensorflow
  Attempting uninstall: tensorflow-estimator
    Fo

In [4]:
import nltk
from collections import Counter
from tqdm import tqdm_notebook
import numpy as np
import tensorflow as tf
from tensorflow.contrib import seq2seq
from tensorflow.contrib.rnn import DropoutWrapper
import random

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [5]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [6]:
MAX_SEQ_LEN = 20
BATCH_SIZE = 64

In [7]:
class Lang:
    def __init__(self, counter, vocab_size):
        self.word2id = {}
        self.id2word = {}
        self.pad = "<PAD>"
        self.sos = "<SOS>"
        self.eos = "<EOS>"
        self.unk = "<UNK>"
        
        self.ipad = 0
        self.isos = 1
        self.ieos = 2
        self.iunk = 3
        
        self.word2id[self.pad] = 0
        self.word2id[self.sos] = 1
        self.word2id[self.eos] = 2
        self.word2id[self.unk] = 3
        
        self.id2word[0] = self.pad
        self.id2word[1] = self.sos
        self.id2word[2] = self.eos
        self.id2word[3] = self.unk
        
        curr_id = 4
        for w, c in counter.most_common(vocab_size):
            self.word2id[w] = curr_id
            self.id2word[curr_id] = w
            curr_id += 1
            
    def encodeSentence(self, s, max_len=-1):
        wseq = s.lower().strip()
        if max_len == -1:
            return [self.word2id[w] if w in self.word2id else self.iunk for w in wseq]
        else:
            return ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + [self.ieos] + [self.ipad]*max_len)[:max_len]
        
    def encodeSentence2(self, s, max_len=-1):
        wseq = wseq = s.lower().strip()
        return min(max_len, len(wseq)+1), \
            ([self.word2id[w] if w in self.word2id else self.iunk for w in wseq] + \
                [self.ieos] + [self.ipad]*max_len)[:max_len]
    
    def decodeSentence(self, id_seq):
        id_seq = np.array(id_seq + [self.ieos])
        j = np.argmax(id_seq==self.ieos)
        s = ''.join([self.id2word[x] for x in id_seq[:j]])
        s = s.replace(self.unk, "UNK")
        return s

In [8]:
N = 30823
hi_counter = Counter()
hi_sentences=[]
en_counter = Counter()
en_sentences=[]
with open("Output.txt") as f:
    for line in tqdm_notebook(f, total=N, desc="Reading file:"):
        en, hi = line.strip().split("\t")
        hi_sentences.append(hi)
        en_sentences.append(en)
    for line in tqdm_notebook(hi_sentences, desc="Processing inputs:"):
        for w in line.strip():
            hi_counter[w] += 1
    for line in tqdm_notebook(en_sentences, desc="Processing inputs:"):
        for w in line.strip():
            en_counter[w] += 1

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  import sys


Reading file::   0%|          | 0/30823 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # This is added back by InteractiveShellApp.init_path()


Processing inputs::   0%|          | 0/30823 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


Processing inputs::   0%|          | 0/30823 [00:00<?, ?it/s]

In [9]:
# A few sample hindi characters
print("Most common hi characters in dataset:\n", hi_counter.most_common(5))

print("\nTotal (hi)characters gathered from dataset:",len(hi_counter))

# A few sample english characters
print("\nMost common en characters in dataset:\n", en_counter.most_common(5))

print("\nTotal (en)characters gathered from dataset:", len(en_counter))

Most common hi characters in dataset:
 [('ा', 21123), ('र', 9205), ('े', 8100), ('न', 7225), ('ी', 6546)]

Total (hi)characters gathered from dataset: 66

Most common en characters in dataset:
 [('a', 57220), ('n', 15015), ('i', 14015), ('h', 13805), ('e', 12264)]

Total (en)characters gathered from dataset: 27


In [10]:
en_lang = Lang(en_counter, len(en_counter))
hi_lang = Lang(hi_counter, len(hi_counter))

In [11]:
print("Test en encoding:", en_lang.encodeSentence("Shukriya"))

print("Test en decoding:", en_lang.decodeSentence(en_lang.encodeSentence("Shukriya", 10)))

print("Test hindi encoding:", hi_lang.encodeSentence("शुक्रिया", 10))

print("Test hindi decoding:", hi_lang.decodeSentence((hi_lang.encodeSentence("शुक्रिया", 10))))

Test en encoding: [15, 7, 10, 13, 9, 6, 20, 4]
Test en decoding: shukriya
Test hindi encoding: [35, 19, 15, 22, 5, 12, 21, 4, 2, 0]
Test hindi decoding: शुक्रिया


In [12]:
VE = len(en_lang.word2id)
VH = len(hi_lang.word2id)

In [13]:
en_word_emb_matrix = tf.get_variable("en_word_emb_matrix", (VE, 300), dtype=tf.float32)
hi_word_emb_matrix = tf.get_variable("hi_word_emb_matrix", (VH, 300), dtype=tf.float32)

Instructions for updating:
Colocations handled automatically by placer.


In [14]:
keep_prob = tf.placeholder(tf.float32)

input_ids = tf.placeholder(tf.int32, (None, MAX_SEQ_LEN))
input_lens = tf.placeholder(tf.int32, (None, ))

ph_target_ids = tf.placeholder(tf.int32, (None, MAX_SEQ_LEN))
target_lens = tf.placeholder(tf.int32, (None, ))

In [15]:
# Add SOS or GO symbol
target_ids = tf.concat([tf.fill([BATCH_SIZE,1], hi_lang.isos), ph_target_ids], -1)

In [16]:
input_emb = tf.nn.embedding_lookup(en_word_emb_matrix, input_ids)
target_emb = tf.nn.embedding_lookup(hi_word_emb_matrix, target_ids[:, :-1])

In [17]:
encoder_cell = tf.nn.rnn_cell.GRUCell(128) # 128 is the dimension of hidden state
encoder_cell = DropoutWrapper(encoder_cell, output_keep_prob=keep_prob) # Adding Dropout for regularization

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.


In [18]:
enc_outputs, enc_state = tf.nn.dynamic_rnn(
    encoder_cell, # The encoder GRU cell
    input_emb, # Embedded input sequence
    sequence_length=input_lens, # Sequence lengths of individual inputs in a batch
    initial_state=encoder_cell.zero_state(BATCH_SIZE, dtype=tf.float32)
)

Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [19]:
decoder_cell = tf.nn.rnn_cell.GRUCell(128)
decoder_cell = DropoutWrapper(decoder_cell, output_keep_prob=keep_prob)

In [20]:
output_projection = tf.layers.Dense(len(hi_lang.word2id))

In [21]:
helper = seq2seq.TrainingHelper(target_emb, target_lens)
decoder = seq2seq.BasicDecoder(decoder_cell, helper, enc_state, output_projection)
outputs, _, outputs_lens = seq2seq.dynamic_decode(decoder, maximum_iterations=MAX_SEQ_LEN, 
                                                  impute_finished=False, swap_memory=True)
output_max_len = tf.reduce_max(outputs_lens)

In [22]:
# Using the decoder_cell without dropout here.
infer_helper = seq2seq.GreedyEmbeddingHelper(hi_word_emb_matrix, tf.fill([BATCH_SIZE, ], hi_lang.isos), hi_lang.ieos)
infer_decoder = seq2seq.BasicDecoder(decoder_cell, infer_helper, enc_state, output_projection)
infer_output = seq2seq.dynamic_decode(infer_decoder, maximum_iterations=MAX_SEQ_LEN, swap_memory=True)

In [23]:
# Sequence mask:
# To make sure we don't back-propagate error from output of length positions
masks = tf.sequence_mask(target_lens, output_max_len, dtype=tf.float32, name='masks')

# Loss function - weighted softmax cross entropy
cost = seq2seq.sequence_loss(
    outputs[0],
    target_ids[:, 1:(output_max_len + 1)],
    masks)

# Optimizer
optimizer = tf.train.AdamOptimizer(0.0001)

In [24]:
train_op = optimizer.minimize(cost)

In [25]:
init = tf.global_variables_initializer()

In [26]:
sess_config = tf.ConfigProto()
sess_config.gpu_options.allow_growth = True

In [27]:
sess = tf.InteractiveSession(config=sess_config)
sess.run(init)

In [28]:
random.seed(41)

In [29]:
parallel = list(zip(en_sentences, hi_sentences))

In [30]:
random.shuffle(parallel)

In [31]:
len(parallel)

30823

In [32]:
parallel[100]

('gilli', 'गिल्ली')

In [33]:
train_n = int(0.95*N)
valid_n = N - train_n

In [34]:
train_pairs = parallel[:train_n].copy()
valid_pairs = parallel[train_n:]

In [35]:
def small_test():
    all_bleu = []
    smoothing = nltk.translate.bleu_score.SmoothingFunction().method7
    for m in range(0, valid_n, BATCH_SIZE):
        # print(f"Status: {m}/{N}", end='\r')
        n = m + BATCH_SIZE
        if n > valid_n:
            # print("Epoch Complete...")
            break

        input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
        input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
        for i in range(m, n):
            b,a = en_lang.encodeSentence2(valid_pairs[i][0], MAX_SEQ_LEN)
            input_batch[i-m,:] = a
            input_lens_batch[i-m] = b

    #     target_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
    #     target_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
    #     for i in range(m, n):
    #         b,a = hi_lang.encodeSentence2(valid_pairs[i][1], MAX_SEQ_LEN)
    #         target_batch[i-m,:] = a
    #         target_lens_batch[i-m] = b

        feed_dict={
            input_ids: input_batch,
            input_lens: input_lens_batch,
            #target_ids: target_batch,
            #target_lens: target_lens_batch,
            keep_prob: 1.0
        }
        pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
        for k, pred_ in enumerate(pred_batch):
            pred_s = hi_lang.decodeSentence(list(pred_))
            ref = valid_pairs[m+k][1]
            try:
                _bx = nltk.translate.bleu_score.sentence_bleu(
                    [ref],
                    pred_s,
                    weights=[1/4]*4,
                    smoothing_function=smoothing)
            except ZeroDivisionError:
                _bx = 0
            all_bleu.append(_bx)

    print(f"BLEU Score: {np.mean(all_bleu)}")

In [36]:
for _e in range(50):
    # Mix things up a bit.
    random.shuffle(train_pairs)
    pbar = tqdm_notebook(range(0, train_n, BATCH_SIZE))
    batch_loss = 0
    bxi = 0
    for m in pbar:
        n = m + BATCH_SIZE
        if n <= train_n:
            # print("Epoch Complete... \n")

            input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
            input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
            for i in range(m, n):
                b,a = en_lang.encodeSentence2(train_pairs[i][0], MAX_SEQ_LEN)
                input_batch[i-m,:] = a
                input_lens_batch[i-m] = b

            target_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
            target_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
            for i in range(m, n):
                b,a = hi_lang.encodeSentence2(train_pairs[i][1], MAX_SEQ_LEN)
                target_batch[i-m,:] = a
                target_lens_batch[i-m] = b

            feed_dict={
                input_ids: input_batch,
                input_lens: input_lens_batch,
                ph_target_ids: target_batch,
                target_lens: target_lens_batch,
                keep_prob: 0.8 
            }
            sess.run(train_op, feed_dict=feed_dict)
            batch_loss += sess.run(cost, feed_dict=feed_dict)
            pbar.set_description(f"Epoch: {_e} >> Loss: {batch_loss/(bxi+1):2.2F}:")
            bxi += 1
            if (1 + n//BATCH_SIZE) % 100 == 0:
                small_test()



Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.008200728609954218
BLEU Score: 0.04088468815787421
BLEU Score: 0.06106201276691397
BLEU Score: 0.08133513737596172


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.10903357903108836
BLEU Score: 0.1312982937928929
BLEU Score: 0.1444172950720591
BLEU Score: 0.15586225962643377


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.1763556629303269
BLEU Score: 0.18294895030709282
BLEU Score: 0.19156298067417432
BLEU Score: 0.19525054326965088


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.20372553993307485
BLEU Score: 0.2078513207734843
BLEU Score: 0.21346600085546044
BLEU Score: 0.22022466155188472


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.23006443720294778
BLEU Score: 0.23635000220221616
BLEU Score: 0.23975724491851566
BLEU Score: 0.2506271468455436


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.2541217642684905
BLEU Score: 0.2601402838659302
BLEU Score: 0.2692922300722485
BLEU Score: 0.2743210686918724


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.2798507489366664
BLEU Score: 0.2874930748643749
BLEU Score: 0.2988659864668804
BLEU Score: 0.3050495424117741


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.31317660712858447
BLEU Score: 0.320946470054241
BLEU Score: 0.3267822492062264
BLEU Score: 0.34307433136825344


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.34984471175009096
BLEU Score: 0.35451525257465205
BLEU Score: 0.3669618044361332
BLEU Score: 0.3725169418045187


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.3908998247253053
BLEU Score: 0.39279172874705875
BLEU Score: 0.39805021093678244
BLEU Score: 0.4010345595097376


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.4086512968083804
BLEU Score: 0.4081399278887831
BLEU Score: 0.42068436004465753
BLEU Score: 0.436613466282118


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.4461747863291577
BLEU Score: 0.4446895612006454
BLEU Score: 0.4508197134152963
BLEU Score: 0.46370854359199054


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.4760685512966422
BLEU Score: 0.47608952881271804
BLEU Score: 0.4798312341146021
BLEU Score: 0.48895260627747295


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.49188628008822444
BLEU Score: 0.49779752477668765
BLEU Score: 0.5076658770073261
BLEU Score: 0.5092196545254053


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.5128147585483416
BLEU Score: 0.5214307225476547
BLEU Score: 0.5284193194894918
BLEU Score: 0.5261565724899877


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.5331293184485251
BLEU Score: 0.5400193251067057
BLEU Score: 0.5498426263217971
BLEU Score: 0.5532679852460013


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.5525550773114173
BLEU Score: 0.5586823482667186
BLEU Score: 0.5608925424110449
BLEU Score: 0.5632423525341785


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.5717146705565873
BLEU Score: 0.5723092122945815
BLEU Score: 0.5791001653491632
BLEU Score: 0.5857229292388499


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.5851171891068586
BLEU Score: 0.590492511204772
BLEU Score: 0.5919940453314677
BLEU Score: 0.587086369265866


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.596856824728991
BLEU Score: 0.6054152007564976
BLEU Score: 0.6138320652507496
BLEU Score: 0.611613354926631


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6084315471516583
BLEU Score: 0.6197646555057182
BLEU Score: 0.6271204261678746
BLEU Score: 0.6255039223880927


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6238518942578174
BLEU Score: 0.6281152785018105
BLEU Score: 0.63399106016549
BLEU Score: 0.6405072423962964


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6359387356203116
BLEU Score: 0.641030969628506
BLEU Score: 0.6409051966586713
BLEU Score: 0.6460660642797723


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6544017472758802
BLEU Score: 0.6571521819763019
BLEU Score: 0.6483148405328243
BLEU Score: 0.6526970672772718


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6552726932433285
BLEU Score: 0.6595838797445893
BLEU Score: 0.6661108948638211
BLEU Score: 0.6628713710796436


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6650333271259162
BLEU Score: 0.6700588749394019
BLEU Score: 0.6651228322421592
BLEU Score: 0.6656706206153326


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6756043030141846
BLEU Score: 0.673614197144131
BLEU Score: 0.6779539464241123
BLEU Score: 0.6774350769095331


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6762026594263357
BLEU Score: 0.6755847251647588
BLEU Score: 0.6817730736681821
BLEU Score: 0.683707279291907


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6855976938711473
BLEU Score: 0.6778843190154297
BLEU Score: 0.6856254823219187
BLEU Score: 0.6891653506123009


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6920900000650926
BLEU Score: 0.6935000867401526
BLEU Score: 0.6917894563024115
BLEU Score: 0.6927006552395089


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6942511422033381
BLEU Score: 0.6992890365794818
BLEU Score: 0.6978430282343568
BLEU Score: 0.6995105869531636


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.6985470923713809
BLEU Score: 0.698860958715152
BLEU Score: 0.701905942503585
BLEU Score: 0.7056121961098446


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7022884539037016
BLEU Score: 0.7050500923947615
BLEU Score: 0.7092254125432548
BLEU Score: 0.7075032502261069


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7081787942634321
BLEU Score: 0.7108927445637684
BLEU Score: 0.7094418727826045
BLEU Score: 0.7077257896237213


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7139208147431505
BLEU Score: 0.7068871759975881
BLEU Score: 0.7141076882727712
BLEU Score: 0.7149670764124241


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7164453826610857
BLEU Score: 0.7183932290740686
BLEU Score: 0.7201726608747293
BLEU Score: 0.7190707767041168


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7197681176918468
BLEU Score: 0.7258045586801787
BLEU Score: 0.7250013258213145
BLEU Score: 0.7213019441518252


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.726666172645302
BLEU Score: 0.7230619707237307
BLEU Score: 0.7261057848394278
BLEU Score: 0.7277090657582631


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7247423784070947
BLEU Score: 0.7333664296531067
BLEU Score: 0.7277687686017696
BLEU Score: 0.7322572745447017


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7307298174587279
BLEU Score: 0.7362567141862743
BLEU Score: 0.7260175431987131
BLEU Score: 0.728697273145527


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7374263133024379
BLEU Score: 0.7277300840725771
BLEU Score: 0.7400754345505174
BLEU Score: 0.7343333395794321


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7338776580237015
BLEU Score: 0.7340469137066071
BLEU Score: 0.7316477313379796
BLEU Score: 0.7374069132724038


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7333070122383414
BLEU Score: 0.7396274269456912
BLEU Score: 0.7400029937580398
BLEU Score: 0.7385854710138338


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7432509035143747
BLEU Score: 0.7403450966408388
BLEU Score: 0.7349094700479727
BLEU Score: 0.7381214761896532


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7420964287341189
BLEU Score: 0.7426699266756497
BLEU Score: 0.7407838669466787
BLEU Score: 0.7434652903550866


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7509009656650262
BLEU Score: 0.7429464445802684
BLEU Score: 0.740070338698947
BLEU Score: 0.7422218439598263


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7448459293355203
BLEU Score: 0.7507162176919634
BLEU Score: 0.7447134317665011
BLEU Score: 0.7440596168996203


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7426599357279254
BLEU Score: 0.7469609027901658
BLEU Score: 0.7496384502176943
BLEU Score: 0.748068339066927


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7495530858244536
BLEU Score: 0.747918135432346
BLEU Score: 0.7467553917560578
BLEU Score: 0.7471062027942365


  0%|          | 0/458 [00:00<?, ?it/s]

BLEU Score: 0.7531629559941875
BLEU Score: 0.7566973287487917
BLEU Score: 0.755460153648834
BLEU Score: 0.7516116068359775


In [74]:
saver = tf.train.Saver(tf.all_variables())
saver.save(sess, 'model.chkpt')

'model.chkpt'

In [82]:
def transliterate(s):
    input_batch = np.zeros((BATCH_SIZE, MAX_SEQ_LEN), dtype=np.int32)
    input_lens_batch = np.zeros((BATCH_SIZE,), dtype=np.int32)
    b,a = en_lang.encodeSentence2(s, MAX_SEQ_LEN)
    input_batch[0, :] = a
    input_lens_batch[0] = b
    
    feed_dict={
        input_ids: input_batch,
        input_lens: input_lens_batch,
        #target_ids: target_batch,
        #target_lens: target_lens_batch,
        keep_prob: 1.0
    }
    pred_batch = sess.run(infer_output[0].sample_id, feed_dict=feed_dict)
    pred_ = pred_batch[0]
    pred_s = hi_lang.decodeSentence(list(pred_))
    # ref = valid_pairs[m+k][1]
    return pred_s

In [86]:
transliterate("dev")

'देव'

In [87]:
transliterate('sajal')

'सजल'

In [90]:
transliterate('Hi')

'ही'

In [91]:
transliterate('rashi')

'रशी'

In [92]:
transliterate('kon')

'कों'