#Environment

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib import ticker
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Input, Embedding, Dense
from tensorflow.keras.models import Model
from tensorflow import keras
from keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras import layers
import pandas as pd
import unicodedata
import re
import string
import numpy as np
import os
import io
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!nvidia-smi

Mon Jul  5 11:58:55 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Downloading and preparing the dataset


## Downloading dataset

In [None]:
!cp '/content/drive/MyDrive/Colab Notebooks/kaggle.json' /content

In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

kaggle.json


In [None]:
!kaggle datasets download -d aiswaryaramachandran/hindienglish-corpora

Downloading hindienglish-corpora.zip to /content
 65% 9.00M/13.9M [00:00<00:00, 17.0MB/s]
100% 13.9M/13.9M [00:00<00:00, 20.4MB/s]


In [None]:
!unzip -q hindienglish-corpora.zip -d dataset

In [None]:
df = pd.read_csv('/content/dataset/Hindi_English_Truncated_Corpus.csv')

In [None]:
df.head()

Unnamed: 0,source,english_sentence,hindi_sentence
0,ted,politicians do not have permission to do what ...,"राजनीतिज्ञों के पास जो कार्य करना चाहिए, वह कर..."
1,ted,"I'd like to tell you about one such child,",मई आपको ऐसे ही एक बच्चे के बारे में बताना चाहू...
2,indic2012,This percentage is even greater than the perce...,यह प्रतिशत भारत में हिन्दुओं प्रतिशत से अधिक है।
3,ted,what we really mean is that they're bad at not...,हम ये नहीं कहना चाहते कि वो ध्यान नहीं दे पाते
4,indic2012,.The ending portion of these Vedas is called U...,इन्हीं वेदों का अंतिम भाग उपनिषद कहलाता है।


In [None]:
pd.isnull(df).sum()

source              0
english_sentence    2
hindi_sentence      0
dtype: int64

In [None]:
df = df.dropna()

In [None]:
df.drop('source', axis = 1, inplace = True)

In [None]:
df.drop_duplicates(inplace = True)

## Preprocessing Sentence  


1.   Add a start and end token to each sentence.
2.   Clean the sentences by removing special characters.
3.   Create a word index and reverse word index (dictionaries mapping from word → id and id → word).
4.   Pad each sentence to a maximum length.






In [None]:
df['english_sentence'] = df['english_sentence'].apply(lambda x : x.lower())
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x : x.lower())

In [None]:
df['english_sentence'] = df['english_sentence'].apply(lambda x : re.sub("'", '', x))
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x : re.sub("'", '', x))

In [None]:
exclude = set(string.punctuation)
df['english_sentence'] = df['english_sentence'].apply(lambda x : ''.join(ch for ch in x if ch not in exclude))
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x : ''.join(ch for ch in x if ch not in exclude))
print(exclude)

{'_', '*', ':', '@', '!', '&', '[', '+', '>', '`', '~', "'", ')', '?', '/', '|', '$', '(', ';', ',', '.', '}', '-', ']', '=', '"', '<', '#', '{', '\\', '^', '%'}


[maketrans explaination](https://www.programiz.com/python-programming/methods/string/maketrans)  
[translate explaination](https://www.programiz.com/python-programming/methods/string/translate)

In [None]:
remove_digits = str.maketrans('', '', string.digits)
df['english_sentence'] = df['english_sentence'].apply(lambda x : x.translate(remove_digits))
df['hindi_sentence']=df['hindi_sentence'].apply(lambda x: x.translate(remove_digits))
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x: re.sub("[२३०८१५७९४६]", "", x))
print(remove_digits)

{48: None, 49: None, 50: None, 51: None, 52: None, 53: None, 54: None, 55: None, 56: None, 57: None}


Removing spaces

In [None]:
df['english_sentence'] = df['english_sentence'].apply(lambda x : x.strip())
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x: x.strip())
df['english_sentence'] = df['english_sentence'].apply(lambda x : re.sub(" +", " ", x))
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x: re.sub(" +", " ", x))

Adding start and end tokens

In [None]:
df['hindi_sentence'] = df['hindi_sentence'].apply(lambda x : "START_ " + x + " _END")

In [None]:
df.head(20)

Unnamed: 0,english_sentence,hindi_sentence
0,politicians do not have permission to do what ...,START_ राजनीतिज्ञों के पास जो कार्य करना चाहिए...
1,id like to tell you about one such child,START_ मई आपको ऐसे ही एक बच्चे के बारे में बता...
2,this percentage is even greater than the perce...,START_ यह प्रतिशत भारत में हिन्दुओं प्रतिशत से...
3,what we really mean is that theyre bad at not ...,START_ हम ये नहीं कहना चाहते कि वो ध्यान नहीं ...
4,the ending portion of these vedas is called up...,START_ इन्हीं वेदों का अंतिम भाग उपनिषद कहलाता...
5,the then governor of kashmir resisted transfer...,START_ कश्मीर के तत्कालीन गवर्नर ने इस हस्तांत...
6,in this lies the circumstances of people befor...,START_ इसमें तुमसे पूर्व गुज़रे हुए लोगों के ह...
7,and who are we to say even that they are wrong,START_ और हम होते कौन हैं यह कहने भी वाले कि व...
8,“”global warming“” refer to warming caused in ...,START_ ग्लोबल वॉर्मिंग से आशय हाल ही के दशकों ...
9,you may want your child to go to a school that...,START_ हो सकता है कि आप चाहते हों कि आप का नऋर...


In [None]:
df['len_eng_sentence'] = df['english_sentence'].apply(lambda x : len(x.split()))
df['len_hindi_sentence'] = df['hindi_sentence'].apply(lambda x : len(x.split()))

In [None]:
print(df.iloc[82040]['english_sentence'])
print(df.iloc[82040]['hindi_sentence'])
print(df.iloc[82040])

mumbai city is situated in the western india of konkan regionand on ulhas river
START_ मुंबई शहर भारत के पश्चिमी तट पर कोंकण तटीय क्षेत्र में उल्हास नदी के मुहाने पर स्थित है। _END
english_sentence      mumbai city is situated in the western india o...
hindi_sentence        START_ मुंबई शहर भारत के पश्चिमी तट पर कोंकण त...
len_eng_sentence                                                     14
len_hindi_sentence                                                   20
Name: 83631, dtype: object


In [None]:
print(df.shape)
print(df[df['len_eng_sentence']>30].shape)
df[df['len_hindi_sentence']>30].shape

(124825, 4)
(12111, 4)


(18757, 4)

In [None]:
lines = df[df['len_eng_sentence']<=30]
lines = lines[lines['len_hindi_sentence']<=30]

In [None]:
lines.head()

Unnamed: 0,english_sentence,hindi_sentence,len_eng_sentence,len_hindi_sentence
0,politicians do not have permission to do what ...,START_ राजनीतिज्ञों के पास जो कार्य करना चाहिए...,12,15
1,id like to tell you about one such child,START_ मई आपको ऐसे ही एक बच्चे के बारे में बता...,9,13
2,this percentage is even greater than the perce...,START_ यह प्रतिशत भारत में हिन्दुओं प्रतिशत से...,10,11
3,what we really mean is that theyre bad at not ...,START_ हम ये नहीं कहना चाहते कि वो ध्यान नहीं ...,12,13
4,the ending portion of these vedas is called up...,START_ इन्हीं वेदों का अंतिम भाग उपनिषद कहलाता...,9,10


In [None]:
lines.shape

(105194, 4)

# Getting the data ready to feed it to the transformer

In [None]:
eng_tokenizer = Tokenizer()
eng_tokenizer.fit_on_texts(lines['english_sentence'])

In [None]:
hindi_tokenizer = Tokenizer()
hindi_tokenizer.fit_on_texts(lines['hindi_sentence'])

In [None]:
input_words = list(eng_tokenizer.word_index.keys())
target_words = list(hindi_tokenizer.word_index.keys())
num_encoder_tokens = len(input_words)
num_decoder_tokens = len(target_words) + 1 # for zero padding
num_encoder_tokens, num_decoder_tokens
latent_dim = 300

In [None]:
lines = shuffle(lines)
lines.head(10)

Unnamed: 0,english_sentence,hindi_sentence,len_eng_sentence,len_hindi_sentence
64865,rajasthan state roadways corporationrstc opera...,START_ राजस्थान राज्य परिवहन निगम rstc की उत्त...,14,19
34836,in movies and history,START_ फिल्म एवं साहित्य में _END,4,6
65053,architect of main dome ismailak ismail khan wh...,START_ मुख्य गुम्बद का अभिकल्पक इस्माइल एकाइस्...,18,20
81240,its not really what is realistic,START_ यह सच में वास्तविक नहीं है _END,6,8
72718,getting these models to be adopted for drug di...,START_ दवाओं की खोज के लिए अपनाने की दिशा में ...,9,11
94422,after her limp suicide attempt the begum suffe...,START_ आत्महत्या का प्रयास विफल हो जाने के बाद...,23,24
118766,now what about the indus script,START_ अब सिंधु लिपि के बारे में क्या _END,6,9
127155,residents were often given less than a months ...,START_ कभीकभी तो वहां के निवासियों की बिल्डिंग...,14,24
124827,these episodes give us a glimpse of the kind o...,START_ इन घटनाओं से बसव के यहां आने वालों की ए...,17,16
58433,you can get more information from the dss website,START_ आप को इस के बारे में अधिक जानकारी ढ्श्श...,9,17


In [None]:
x, y = lines['english_sentence'][:], lines['hindi_sentence'][:]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((94674,), (10520,), (94674,), (10520,))

In [None]:
x_train = eng_tokenizer.texts_to_sequences(x_train)
y_train = hindi_tokenizer.texts_to_sequences(y_train)

In [None]:
x_train = pad_sequences(x_train, maxlen = 30, padding = 'post')
y_train = pad_sequences(y_train, maxlen = 31, padding = 'post')

In [None]:
x_test = eng_tokenizer.texts_to_sequences(x_test)
y_test = hindi_tokenizer.texts_to_sequences(y_test)

x_test = pad_sequences(x_test, maxlen = 30, padding = 'post')
y_test = pad_sequences(y_test, maxlen = 31, padding = 'post')

In [None]:
max_len_targ, max_length_inp = y_train.shape[1], x_train.shape[1]
BUFFER_SIZE = len(x_train)
BATCH_SIZE = 128
embed_dim = 256
latent_dim = 2048
num_heads = 8
sequence_length = 30
vocab_inp_size = len(eng_tokenizer.word_index) + 1
vocab_tar_size = len(hindi_tokenizer.word_index) + 1

train_ds = tf.data.Dataset.from_tensor_slices(((x_train, y_train[:, :-1]), y_train[:, 1:])).shuffle(BUFFER_SIZE)
train_ds = train_ds.batch(BATCH_SIZE, drop_remainder = True)

test_ds = tf.data.Dataset.from_tensor_slices(((x_test, y_test[:, :-1]), y_test[:, 1:])).shuffle(BUFFER_SIZE)
test_ds = test_ds.batch(BATCH_SIZE, drop_remainder = True)

In [None]:
for (enc_inputs, decoder_inputs), targets in test_ds.take(1):
    print(f'inputs["encoder_inputs"].shape: {enc_inputs.shape}')
    print(f'inputs["decoder_inputs"].shape: {decoder_inputs.shape}')
    print(f"targets.shape: {targets.shape}")
    print(f'inputs["encoder_inputs"] example: {enc_inputs[0]}')
    print(f'inputs["decoder_inputs"] example: {decoder_inputs[0]}')
    print(f"targets example: {targets[0]}")

inputs["encoder_inputs"].shape: (128, 30)
inputs["decoder_inputs"].shape: (128, 30)
targets.shape: (128, 30)
inputs["encoder_inputs"] example: [  419  4531   348  1608  4616    32  7285    27   753    18 39645    22
   419     5   419     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0]
inputs["decoder_inputs"] example: [    1  3358    85  5499  4541    17    59  4306    17    59     7 14968
    17    59     7    12     8   197  1246    38  2785 43783    11     2
     0     0     0     0     0     0]
targets example: [ 3358    85  5499  4541    17    59  4306    17    59     7 14968    17
    59     7    12     8   197  1246    38  2785 43783    11     2     0
     0     0     0     0     0     0]


# Transformer Model

In [None]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, seq_len, vocab_size, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=seq_len, output_dim=embed_dim)

        self.seq_len = seq_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
    
    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embed_tokens = self.emb(inputs)
        embed_pos = self.pos_emb(positions)
        return embed_tokens + embed_pos

    def compute_mask(self, inputs, mask = None):
        return tf.math.not_equal(inputs, 0)

In [None]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super(TransformerEncoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(num_heads=num_heads,
                                                   key_dim=embed_dim)
        self.dense_proj = tf.keras.Sequential([
                                               layers.Dense(dense_dim, activation = 'relu'),
                                               layers.Dense(embed_dim)
        ])
        self.ln1 = layers.LayerNormalization()
        self.ln2 = layers.LayerNormalization()
        
    def call(self, inputs, mask = None):
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, tf.newaxis, :], dtype='int32')
        attention_out = self.attention(
            query = inputs, value = inputs, key = inputs, attention_mask = padding_mask
        )
        proj_input = self.ln1(inputs + attention_out)
        proj_output = self.dense_proj(proj_input)
        return self.ln2(proj_input + proj_output)

In [None]:
class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
        super(TransformerDecoder, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(latent_dim, activation="relu"), layers.Dense(embed_dim)]
        )
        self.ln1 = layers.LayerNormalization()
        self.ln2 = layers.LayerNormalization()
        self.ln3 = layers.LayerNormalization()

    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_att_mask(inputs) # 128, 30, 30
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
            padding_mask = tf.minimum(padding_mask, causal_mask)

        attention_output_1 = self.attention_1(
            query=inputs, value=inputs, key=inputs, attention_mask=causal_mask
        )
        out_1 = self.ln1(inputs + attention_output_1)

        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,
        )
        out_2 = self.ln2(out_1 + attention_output_2)

        proj_output = self.dense_proj(out_2)
        return self.ln3(out_2 + proj_output)

    def get_causal_att_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, seq_len = input_shape[0], input_shape[1]
        i = tf.range(seq_len)[:, tf.newaxis]
        j = tf.range(seq_len)
        mask = tf.cast(i >= j, dtype='int32')
        mask = tf.reshape(mask, (1, seq_len, seq_len))
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1),
            tf.constant([1, 1], dtype=tf.int32)],
            axis = 0
            )
        return tf.tile(mask, mult)

In [None]:
encoder_inputs = tf.keras.Input(shape=(sequence_length), dtype="int64", name="encoder_inputs") # 128, 30
x = PositionalEmbedding(sequence_length, vocab_inp_size, embed_dim)(encoder_inputs) # 128, 30, 300
encoder_outputs = TransformerEncoder(embed_dim, latent_dim, num_heads)(x) # 128, 2048 
encoder = Model(encoder_inputs, encoder_outputs) 

decoder_inputs = Input(shape=(sequence_length), dtype="int64", name="decoder_inputs") # 128, 30
encoded_seq_inputs = Input(shape=(sequence_length, embed_dim), name="decoder_state_inputs") # 128, 30, 300
x = PositionalEmbedding(sequence_length, num_decoder_tokens, embed_dim)(decoder_inputs) # 128, 30, 300
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, encoded_seq_inputs) # #128, 30, 300
x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_tar_size, activation="softmax")(x)
decoder = Model([decoder_inputs, encoded_seq_inputs], decoder_outputs)

decoder_outputs = decoder([decoder_inputs, encoder_outputs])
transformer = Model(
    [encoder_inputs, decoder_inputs], decoder_outputs, name="transformer"
)
transformer.compile("adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
transformer.summary()

Model: "transformer"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, 30)]         0                                            
__________________________________________________________________________________________________
positional_embedding (Positiona (None, 30, 256)      14314752    encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     [(None, 30)]         0                                            
__________________________________________________________________________________________________
transformer_encoder (Transforme (None, 30, 256)      3155456     positional_embedding[0][0]       
________________________________________________________________________________________

In [None]:
transformer.compile("adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = transformer.fit(train_ds, epochs=15, validation_data=test_ds)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
transformer.save('english-to-hindi-translator-transformer')

In [None]:
transformer.save_weights('translator_weights.h5')

In [None]:
!cp -r '/content/english-to-hindi-translator-transformer' '/content/drive/MyDrive/NLP/English to Hindi machine translation/transformer/'

In [None]:
!cp '/content/translator_weights.h5' '/content/drive/MyDrive/NLP/English to Hindi machine translation'

# Inference

In [None]:
transformer.load_weights('/content/drive/MyDrive/NLP/English to Hindi machine translation/translator_weights.h5')

In [None]:
def english_vectorization(sentence):
    sample = eng_tokenizer.texts_to_sequences([sentence])
    sample = pad_sequences(sample, maxlen = 30, padding = 'post')
    return sample

def hindi_vectorization(sentence):
    sample = hindi_tokenizer.texts_to_sequences([sentence])
    sample = pad_sequences(sample, maxlen = 31, padding = 'post')
    return sample

english_sample = lines['english_sentence'][1]
hindi_sample = lines['hindi_sentence'][1]

print(english_sample)
tokenized_sample = english_vectorization(english_sample)
print(tokenized_sample)
print('*' * 80)
print(hindi_sample)
tokenized_sample = hindi_vectorization(hindi_sample)
print(tokenized_sample)

id like to tell you about one such child
[[1185   57    5  323   16   46   36  129  268    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0]]
********************************************************************************
START_ मई आपको ऐसे ही एक बच्चे के बारे में बताना चाहूंगी _END
[[   1 1092  126  156   22   12  225    3   84    4 1428 4467    2    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0]]


In [None]:
hindi_vocab = target_words
hindi_index_lookup = dict(zip(range(len(hindi_vocab)), hindi_vocab))
max_decoded_sentence_length = 30

def decode_sequence(input_sentence):
    tokenized_input_sentence = english_vectorization(input_sentence)
    decoded_sentence = "[start]"
    
    for i in range(max_decoded_sentence_length):
        tokenized_target_sentence = hindi_vectorization(decoded_sentence)[:, :-1]
        #tokenized_target_sentence = hindi_vectorization(decoded_sentence)
        #print(tokenized_target_sentence)
        
        predictions = transformer.predict([tokenized_input_sentence, tokenized_target_sentence])
        predicted_word = np.argmax(predictions[0, i, :])
        
        #print(predicted_word)
        sampled_token = hindi_tokenizer.sequences_to_texts([[predicted_word]])
        #print(sampled_token)
        #print(decoded_sentence)
        if sampled_token[0] == "end":
            break
        decoded_sentence += " " + sampled_token[0]
    decoded_sentence = decoded_sentence[8:]
    return decoded_sentence


In [None]:
def translate(english_sentence):
    hindi_translation = decode_sequence(english_sentence)
    print(f'English Input : {english_sentence}')
    print(f'Hindi Translation : {hindi_translation}')

In [None]:
translate(english_sample)

English Input : id like to tell you about one such child
Hindi Translation : मैं आपको एक बच्चे के बारे में बताना चाहूँगा


In [None]:
for (enc_inps, dec_inps), outs in test_ds:
    for example in range(3):
        inps_example = [enc_inps[example:example+1,:], dec_inps[example:example+1, :]]
        predictions = transformer.predict(inps_example)
        sampled_tokens_index = np.argmax(predictions[0,:, :], axis=1)
        pred = hindi_tokenizer.sequences_to_texts([sampled_tokens_index])[0][:-3]
        targ = hindi_tokenizer.sequences_to_texts([dec_inps.numpy()[example]])[0][5:-3]
        eng_ex = eng_tokenizer.sequences_to_texts([enc_inps.numpy()[example]])[0]

        print("Input : ", eng_ex)
        print("Target : ", targ) 
        print("Predicted : ", pred) 
        print()
    break

Input :  at a parliamentary party meeting patil s “ impartiality ” stumped his audience ” if we want we can support poto
Target :   संसदीय दल की एक बै क में पाटील ने यह कहकर सबको चकरा दिया कि हम चाहें तो पोटो का समर्थन कर सकते हैं चाहें तो विरोध 
Predicted :  एक दल के मुय वरिष् क में यह ही भी बात कि मालूम कि कि हम अर्जेंटीना या मीड़िया से पता दें लेते हैं end end क्या

Input :  fish tenga
Target :   फिश टेंगा 
Predicted :  फिश टेंगा 

Input :  religious texts of hindu religion has been divided into two parts shruti and smriti
Target :   हिंदू धर्म के पवित्र ग्रन्थों को दो भागों में बाँटा गया है श्रुति और स्मृति। 
Predicted :  हिन्दू धर्म के अलावा ग्रन्थों को तीन भागों में बाँटा गया है। श्रुति और स्मृति। 

