In [1]:
import pandas as pd
import tokenizers
import torch
from torch import nn
import numpy as np

In [2]:
data = pd.read_table("/kaggle/input/language-translation/Sentence pairs in English-Hindi - 2025-04-02.tsv", header=None)

In [3]:

data.columns = ["Eng-ID","Eng","Hin-ID","Hin"]

In [4]:
data = data.sample(data.shape[0])

In [5]:
English_data = data.loc[:,["Eng-ID","Eng"]]

In [6]:
Hindi_data = data.loc[:,["Hin-ID","Hin"]]

In [7]:
hin_itr = Hindi_data["Hin"].to_list()
eng_itr = English_data["Eng"].to_list()

In [8]:
from tokenizers import Tokenizer

from tokenizers import normalizers
from tokenizers.normalizers import NFKC, Lowercase

from tokenizers import pre_tokenizers
from tokenizers.pre_tokenizers import Whitespace, Punctuation, Digits, BertPreTokenizer

from tokenizers.processors import TemplateProcessing

from tokenizers.models import BPE
from tokenizers.trainers import BpeTrainer

In [9]:




hin_tokenizer = Tokenizer(BPE(unk_token = "[UNK]"))

hin_tokenizer.normalizer = normalizers.Sequence([NFKC(), Lowercase()])

hin_tokenizer.pre_tokenizer = pre_tokenizers.Sequence([Whitespace(), Punctuation(), Digits(individual_digits=True)])

hin_tokenizer.post_processor = TemplateProcessing(
    single = "[SOS] $A [EOS]",
    special_tokens = [
        ("[SOS]",1),
        ("[EOS]",2),
        ("[PAD]",0)
    ]
)

trainer = BpeTrainer( special_tokens = ["[PAD]", "[SOS]" , "[EOS]", "[UNK]"])

In [10]:
hin_tokenizer.train_from_iterator(hin_itr,trainer)

hin_tokenizer.enable_padding()

In [11]:
hin_tokenizer.get_vocab_size()

10463

In [12]:
hin_encode = hin_tokenizer.encode_batch(hin_itr)

In [13]:
hin_vocab_len = hin_tokenizer.get_vocab_size()

In [14]:
eng_tokenizer = Tokenizer(BPE(unk_token = "[UNK]"))

eng_tokenizer.normalizer = normalizers.Sequence([NFKC(), Lowercase()])

eng_tokenizer.pre_tokenizer = pre_tokenizers.Sequence([Whitespace(), Punctuation(), Digits(individual_digits=True)])

eng_tokenizer.post_processor = TemplateProcessing(
    single = "[SOS] $A [EOS]",
    special_tokens = [
        ("[SOS]",1),
        ("[EOS]",2),
        ("[PAD]",0)
    ]
)

trainer = BpeTrainer( special_tokens = ["[PAD]", "[SOS]" , "[EOS]", "[UNK]"])

In [15]:
eng_tokenizer.train_from_iterator(eng_itr,trainer)
eng_tokenizer.enable_padding()

In [16]:
eng_vocab_len = eng_tokenizer.get_vocab_size()

In [17]:
eng_encode = eng_tokenizer.encode_batch(eng_itr)

In [18]:
hind_padded_size = len(hin_encode[0])
eng_padded_size = len(eng_encode[0])


In [19]:
training_size = int(data.shape[0] * 0.7)
# testing_size = data.shape[0] - training_size

In [20]:
shuffeled_index = np.random.permutation(np.arange(0,data.shape[0]))
training_eng_data = np.array(eng_encode)[shuffeled_index[:training_size]]
training_hin_data = np.array(hin_encode)[shuffeled_index[:training_size]]


testing_eng_data = np.array(eng_encode)[shuffeled_index[training_size:]]
testing_hin_data = np.array(hin_encode)[shuffeled_index[training_size:]]

In [21]:
eng_tokenizer.decode_batch([x.ids for x in testing_eng_data[:10]])

['is that a cat ?',
 "they ' re using you .",
 'he likes playing soccer .',
 'they dug here and there for treasure .',
 "you ' re doing very well . keep it up .",
 'she has lived there for seven years .',
 "you often ask questions i can ' t answer .",
 'we were worried about you .',
 'let me know if you find anything .',
 'just wait a minute .']

In [22]:
hin_tokenizer.decode_batch([x.ids for x in testing_hin_data[:10]])

['वह बिल्ली है क्या ?',
 'वे तुम्हारा इस्तेमाल कर रहे हैं ।',
 'उसको फ़ुटबॉल खेलना अच्छा लगता है ।',
 'उन्होंने ख़ज़ाना ढूँढने के लिए यहाँ - वहाँ खोदा ।',
 'तुम अच्छा काम कर रहे हो । ऐसे ही करते रहो ।',
 'वह वहाँ सात साल रही है ।',
 'आप अकसर ऐसे सवाल पूछते हैं जिनका मैं जवाब नहीं दे सकती ।',
 'हमें आपकी चिंता लगी हुई है ।',
 'कुछ मिल जाये तो मुझे बता देना ।',
 'बस एक मिनट रुक ।']

In [23]:
# Class to create a encoder which will take input sequence and give the hidden states at each time step

class encoder(nn.Module):
    def __init__(self):        
        super().__init__()
        self.embedding_layer = nn.Embedding(eng_vocab_len, 620)
        self.rnn1 = nn.LSTM(620,1000, batch_first =True, bidirectional = True)
        self.dropout = nn.Dropout(p = 0.3) 

    def forward(self,input_batch):
        x = self.embedding_layer(input_batch)
        x, (h,c) = self.rnn1(x)
        x = self.dropout(x)
        return (x, (h,c))

In [24]:
# Class to calculate the context vector for the current time step by taking the hidden state of the decoder and all the hidden states given by the 
# encoder

class attention(nn.Module):
    def __init__(self):
        super().__init__()
        self.dense_1 = nn.Linear(2000,1000)
        self.dense_2 = nn.Linear(1000,1000)
        self.dense_3 = nn.Linear(1000,1)

    def forward(self, decoder_hidden_state, encoder_hidden_state):

        x = self.dense_1(encoder_hidden_state)
        y = self.dense_2(decoder_hidden_state).permute(1,0,2)
        x = torch.add(x,y)
        x = nn.functional.tanh(x)
        x = self.dense_3(x)
        x = nn.functional.softmax(x, dim = 1)
        x  = torch.sum(x * encoder_hidden_state, dim = 1)

        return x

In [25]:
# This is the maxout layer explained in the paper and it will project the data in two different vector spaces of same dimension and 
# then calculate the maximum value of a element from both the dimensions.

class maxout(nn.Module):
    def __init__(self):
        super().__init__()
        self.Dense_1 = nn.Linear(1000,hin_vocab_len)
        self.Dense_2 = nn.Linear(1000,hin_vocab_len)

    def forward(self,x):
        lp_1 = self.Dense_1(x)
        lp_2 = self.Dense_2(x)
        lp_1 = torch.cat([lp_1,lp_2], dim = 1)
        lp_1 = lp_1.max(dim = 1)[0].unsqueeze(1)
        return lp_1

In [26]:
# Class to create a decoder which will take previous prediction, hidden states given by encoder at each time step,
# it's own previous hidden states and cell states

class decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(hin_vocab_len,620, padding_idx=0)
        self.norm_1 = nn.LayerNorm(620 + 2000)
        self.rnn = nn.LSTM(620 + 2000,1000,batch_first =True)
        self.norm_2 = nn.LayerNorm(1000)
        self.dropout= nn.Dropout(p = 0.3)
        # self.dense = nn.Linear(800,hin_vocab_len)
        self.maxout = maxout()
        self.attention = attention()


    def forward(self, y_last_pred, encoder_hidden_states, decoder_hidden_state, cell_hidden_state):

        y_last_pred = self.embedding(y_last_pred)
        context_vector = self.attention(decoder_hidden_state, encoder_hidden_states).unsqueeze(1)
        y_last_pred = torch.cat((context_vector,y_last_pred),dim=2)
        y_last_pred = self.norm_1(y_last_pred)
        x , (hidden, cell) = self.rnn(y_last_pred,(decoder_hidden_state,cell_hidden_state))
        x = self.norm_2(x)
        x = self.dropout(x)
        # x = self.dense(x)
        x = self.maxout(x)
        return x, hidden,cell
        

In [27]:
e = encoder().cuda()
d = decoder().cuda()

In [28]:
loss = nn.CrossEntropyLoss(ignore_index = 0 ).cuda()

In [30]:
encoder_bias_parm = []
encoder_thetha = []
for name, weight in list(e.named_parameters()):
    if "bias" in name:
        encoder_bias_parm.append(weight)

    else:
        encoder_thetha.append(weight)


decoder_bias_parm = []
decoder_theta = []
for name, weight in list(d.named_parameters()):
    if "bias" in name:
        decoder_bias_parm.append(weight)

    else:
        decoder_theta.append(weight)

In [32]:
# Applying regularization only on parameter vectors not on the bias terms

optim = torch.optim.Adam([{"params":encoder_bias_parm + decoder_bias_parm, "weight_decay":0},
                        {"params": encoder_thetha + decoder_theta}], weight_decay = 0.005, lr = 0.001)

In [33]:
# Main training loop

def lang_training(orignal_sequence, target_sequence,val_orignal_sequence, val_target_sequence, batch_size, epochs, encoder, decoder, target_vocab_len,):

    
    total_sequence = (len(orignal_sequence)//batch_size) * batch_size
    taget_sequence_length = torch.tensor(orignal_sequence[0].ids).shape[0]
    average_training_loss = {}
    orignal_sequence = np.array(orignal_sequence)
    target_sequence = np.array(target_sequence)
    
    for epoch in range(1,epochs):
        encoder.train()
        decoder.train()
        random_index = np.random.permutation(np.arange(0,orignal_sequence.shape[0]))
        orignal_sequence = orignal_sequence[random_index]
        target_sequence = target_sequence[random_index]
        loss_per_batch = []
        with tqdm(range(0,total_sequence, batch_size)) as tq:
            for time_step, idx in enumerate(tq):
        
                
                # Collecting inputs and targets.
                input_ = torch.tensor([x.ids for x in orignal_sequence[idx:idx + batch_size]]).cuda()                
                output_ = torch.tensor([x.ids for x in target_sequence[idx:idx + batch_size]]).cuda()
        
                # Extracting all the hidden states and the cell states from the encoder, using the [SOS] token for a batch.
                en_out = encoder.forward(input_)
                
                hidden = en_out[1][0][1].unsqueeze(0) 
                cell = en_out[1][1][1].unsqueeze(0)
    
                en_out = en_out[0]
            
                # Assigning 2 np arrays for storing the probabilites of each token and the target token respectively in each timestep.
                y_p_loss = torch.zeros(68,batch_size,target_vocab_len).cuda()
                y_t_loss = torch.zeros(68,batch_size).cuda()
        
            
                # Using the final hidden state of the encoder as the initial hidden state of the decoder and generating the y_1_hat 
                y_last_pred, hidden, cell = decoder.forward(output_[:,0].unsqueeze(1), en_out, hidden, cell)
         
                y_true = output_[:,1] # Loss will be calculated on target[t+1] index
                l = 0
        
                y_p_loss[0] = y_last_pred.squeeze(1)
                y_t_loss[0] = y_true
                
                # Finding the index of the token which have the max probability
                y_last_pred = y_last_pred.argmax(dim = 2)
                
        
                # Generating the probabilites of the tokens for further timesteps and storing them in the assigined variables
                for i in range(1,68):
                    
                    y_last_pred,  hidden, cell = decoder.forward(y_last_pred, en_out, hidden, cell)
                    y_true = output_[:,i+1]            
                    y_p_loss[i] = y_last_pred.squeeze(1)
                    y_t_loss[i] = y_true
                    
                    y_last_pred = y_last_pred.argmax(dim = 2)
        
                # Reshaping the tensors to match the requirement of the CCE loss -> (Batchsize * sequence, classes)
                y_p_loss=y_p_loss.view(-1, target_vocab_len)
                y_t_loss=y_t_loss.view(-1).type(torch.long)
            
                l = loss(y_p_loss, y_t_loss)
                optim.zero_grad()
                l.backward()
                optim.step()
                loss_per_batch.append(l.detach().cpu())
                tq.set_postfix({"Loss":torch.tensor(loss_per_batch).mean()})

        # average_training_loss[epoch] = torch.tensor(loss_per_batch).mean()
        # print(f"Epoch {epoch}           training_loss {average_training_loss[epoch]} ")
        
        val_loss = validation_performance(val_orignal_sequence, val_target_sequence, 50, e,d, target_vocab_len)
        print(f"Epoch {epoch}           val_loss {val_loss}")
    return average_training_loss

In [35]:
# Slight modification of main training loop to work on validation data

def validation_performance(orignal_sequence,target_sequence,batch_size,encoder,decoder,target_vocab_len):
    encoder.eval()
    decoder.eval()
    total_sequence = (len(orignal_sequence)//batch_size) * batch_size
    taget_sequence_length = torch.tensor(orignal_sequence[0].ids).shape[0]
    loss_per_batch = []
    with torch.no_grad():
        for time_step, idx in enumerate(range(0,total_sequence, batch_size)):
        
            # Collecting inputs and targets.
            input_ = torch.tensor([x.ids for x in orignal_sequence[idx:idx + batch_size]]).cuda()                
            output_ = torch.tensor([x.ids for x in target_sequence[idx:idx + batch_size]]).cuda()
        
            # Extracting all the hidden states and the cell states from the encoder, using the [SOS] token for a batch.
            en_out = encoder.forward(input_)
            
            hidden = en_out[1][0][1].unsqueeze(0) 
            cell = en_out[1][1][1].unsqueeze(0)
        
            en_out = en_out[0]
        
            # Assigning 2 np arrays for storing the probabilites of each token and the target token respectively in each timestep.
            y_p_loss = torch.zeros(68,batch_size,target_vocab_len).cuda()
            y_t_loss = torch.zeros(68,batch_size).cuda()
        
        
            # Using the final hidden state of the encoder as the initial hidden state of the decoder and generating the y_1_hat 
            y_last_pred, hidden, cell = decoder.forward(output_[:,0].unsqueeze(1), en_out, hidden, cell)
        
            y_true = output_[:,1] # Loss will be calculated on target[t+1] index
            l = 0
            y_p_loss[0] = y_last_pred.squeeze(1)
            y_t_loss[0] = y_true
            
            # Finding the index of the token which have the max probability
            y_last_pred = y_last_pred.argmax(dim = 2)
            
        
            # Generating the probabilites of the tokens for further timesteps and storing them in the assigined variables
            for i in range(1,68):
                
                y_last_pred,  hidden, cell = decoder.forward(y_last_pred, en_out, hidden, cell)
                y_true = output_[:,i+1]            
                y_p_loss[i] = y_last_pred.squeeze(1)
                y_t_loss[i] = y_true
                
                y_last_pred = y_last_pred.argmax(dim = 2)
        
            # Reshaping the tensors to match the requirement of the CCE loss -> (Batchsize * sequence, classes)
            y_p_loss=y_p_loss.view(-1, target_vocab_len)
            y_t_loss=y_t_loss.view(-1).type(torch.long)
        
            l = loss(y_p_loss, y_t_loss)
            loss_per_batch.append(l.detach().cpu())

    return torch.tensor(loss_per_batch).mean()

In [36]:

lang_training(eng_encode[:-1500], hin_encode[:-1500],eng_encode[-1500:],hin_encode[-1500:], 180, 30, e,d, hin_vocab_len)

100%|██████████| 64/64 [02:57<00:00,  2.77s/it, Loss=tensor(5.6287)]


Epoch 1           val_loss 4.960709095001221


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(4.7877)]


Epoch 2           val_loss 4.5693159103393555


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(4.4330)]


Epoch 3           val_loss 4.369600296020508


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(4.2107)]


Epoch 4           val_loss 4.193140029907227


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(4.0409)]


Epoch 5           val_loss 4.084902763366699


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.9187)]


Epoch 6           val_loss 3.9940576553344727


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.8331)]


Epoch 7           val_loss 3.947662830352783


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.7508)]


Epoch 8           val_loss 3.913719415664673


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.6992)]


Epoch 9           val_loss 3.880254030227661


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.6412)]


Epoch 10           val_loss 3.8507142066955566


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.6062)]


Epoch 11           val_loss 3.8096423149108887


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.5552)]


Epoch 12           val_loss 3.76468563079834


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.5162)]


Epoch 13           val_loss 3.7488999366760254


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.4952)]


Epoch 14           val_loss 3.742936134338379


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.4709)]


Epoch 15           val_loss 3.724591016769409


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.4475)]


Epoch 16           val_loss 3.7111124992370605


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.4274)]


Epoch 17           val_loss 3.7024776935577393


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.4041)]


Epoch 18           val_loss 3.708263635635376


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3954)]


Epoch 19           val_loss 3.693119764328003


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3886)]


Epoch 20           val_loss 3.6809158325195312


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3804)]


Epoch 21           val_loss 3.6841273307800293


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3633)]


Epoch 22           val_loss 3.682408332824707


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3646)]


Epoch 23           val_loss 3.6708970069885254


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3529)]


Epoch 24           val_loss 3.6848747730255127


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3410)]


Epoch 25           val_loss 3.6840949058532715


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3383)]


Epoch 26           val_loss 3.6829986572265625


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3329)]


Epoch 27           val_loss 3.662109851837158


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3221)]


Epoch 28           val_loss 3.6719136238098145


100%|██████████| 64/64 [02:56<00:00,  2.76s/it, Loss=tensor(3.3117)]


Epoch 29           val_loss 3.6705706119537354


In [None]:
np.array(eng_encode[:-500]).shape[0]

In [37]:
input_ = torch.tensor([x.ids for x in eng_encode[10:30]]).cuda()
output_ = torch.tensor([x.ids for x in hin_encode[10:30]]).cuda()

In [38]:
torch.cuda.empty_cache()

In [39]:
input_.shape

torch.Size([20, 66])

In [40]:
en = e.forward(input_)

In [41]:
hidden = en[1][0][1].unsqueeze(0)
cell = en[1][1][1].unsqueeze(0)
en = en[0]

In [42]:
hidden.shape

torch.Size([1, 20, 1000])

In [43]:
y_last_pred,hidden, cell = d.forward(output_[:,0].unsqueeze(1), en, hidden,cell)
y_last_pred = y_last_pred.argmax(dim = 2)

In [44]:
a = torch.zeros((69,20,1))

In [45]:
a[0] = y_last_pred

In [46]:
for i in range(1,69):
    y_last_pred,  hidden, cell = d.forward(y_last_pred, en, hidden, cell)
    y_last_pred = y_last_pred.argmax(dim = 2)
    a[i] = y_last_pred

In [47]:
a = a.permute(1,0,2)

In [48]:
a = a.view(20,-1).type(torch.int32)

In [49]:
hin_tokenizer.decode_batch(a.detach().cpu().tolist())

['एक एक एक एक है है है',
 'उसने ने ने उसे की की । ।',
 'मैं मेरे काम से काम काम काम । ।',
 'मैंने मेरे कल मदद मदद मदद । ।',
 'अभी अभी तक तक तक तक है ?',
 'इस अपना अपना अपना । ।',
 'तुम मैं शादी शादी शादी शादी शादी शादी शादी शादी शादी । ।',
 'मैं हूँ ।',
 'मुझे उसके रात रात साथ साथ । ।',
 'हम वाले वाले वाले ।',
 "' ' ' ' ' ' ।",
 'क्या ! !',
 'मैं कोई नहीं नहीं नहीं नहीं ।',
 'वह वह से था !',
 'तुम बिलकुल नहीं नहीं नहीं ।',
 'वह मेरा दोस्त है ।',
 'तुम मेरे सहायक हो ।',
 'हम अपना काम काम हैं ।',
 'क्या है है ? ?',
 'क्या चाय चाय चाय ? ? ?']

In [50]:
hin_tokenizer.decode_batch(output_.detach().cpu().tolist())

['देश एक खतरनाक मशीन होती है ।',
 'भूख ने उसे चोरी करने पर मजबूर किया ।',
 'मैं अपने काम से ध्यान हटाना चाहता हूँ ।',
 'कल मैंने मेरे पिता की मदद की ।',
 'बारिश अभी तक रुकी नहीं है , है ना ?',
 'अपना मूँह इस तरफ़ मोड़ो ।',
 'तुम जो भी कहो , मैं तो उससे ही शादी करूँगा ।',
 'मैं ऊँची हूँ ।',
 'मेरी उसके साथ आज रात डेट है ।',
 'हम बात करने वाले हैं ।',
 "' chat ' इस फ्रांसीसी शब्द का मतलब है ' बिल्ली ' ।",
 'लानत है !',
 'मैं नहीं चाहता कि कोई भी ग़लतफ़ैमी हो ।',
 'वह एक अनुकरणीय प्रदर्शन था !',
 'तुम बिलकुल भी बेवकूफ नहीं हो ।',
 'वह मेरा दोस्त है ।',
 'आप मेरे नये सहायक हैं ।',
 'हम तुम्हारा काम बाँट रहें हैं ।',
 'लिफ़्ट किस तरफ़ है ?',
 'थोड़ी सी चाय बनाऊं ?']

In [49]:
eng_tokenizer.decode_batch(input_.detach().cpu().tolist())

['stay inside .',
 'bet ?',
 'this is my japanese friend .',
 "why isn ' t it here ?",
 'tom never tells us anything .',
 'zulfiqar was the famous sword of hazret - i ali , fourth caliph of islam .',
 'does arnold schwarzenegger still know german ?',
 'he watered the rose bush .',
 "who ' s helping you ?",
 'write your name with the pencil .',
 "i don ' t exist to you .",
 'tom most probably forgot .',
 "tom pulled mary ' s hair .",
 'i am afraid of death .',
 'my mother is a lawyer .',
 "i ' d caught him red - handed .",
 'i found him .',
 "don ' t expect anything original from an echo .",
 "you wouldn ' t understand .",
 "i ' ve been studying uighur for two years now ."]

In [46]:
hin_tokenizer.decode_batch(a.detach().cpu().tolist())

['अपने पुत्र की प्रशंसा की .',
 'ग़लती करने से डरता है ।',
 'वैसे कहने का कोई अधिकार नहीं है ।',
 'कीजिएगा , पर मैं आपको ठीक से सुन नहीं पा रही हूँ ।',
 'मेरे नाना हैं ।',
 'मैं आपको कुछ दिखा सकता हूं ?',
 'यहाँ उतरना चाहता हूँ .',
 'अमीर नहीं हैं ।',
 'लौटने पर क्या तुम यहाँ होगे ?',
 'अपने पैसे वापस चाहिए ।',
 'आपकी कहानियों पर विश्वास नहीं है ।',
 'में तुम कहां बड़ी हुए ?',
 'ग्राहक ताइवान में हैं , टॉम ने समझाया ।',
 'है वे हमें जानते हैं ।',
 'कुछ ज़्यादा ही शोर कर रहे हैं ।',
 'अपनी मां से प्यार करता हूं ।',
 'सब एक - जैसे होते हैं !',
 'कुछ जाये जाये मुझे बता देना ।',
 'गुदा मैथुन बहुत पसंद है ।',
 'डाक्टर हो , ना ?']

In [47]:
eng_tokenizer.decode_batch(input_.detach().cpu().tolist())

['what did he tell you ?',
 'i wanted tom to study harder .',
 'is this your child ?',
 "this is my father ' s house .",
 'i am dead to you .',
 'is it possible to borrow money ?',
 'she is senior to me by three years .',
 'ken beat me at chess .',
 'what language is spoken in mexico ?',
 'here they are !',
 'begin !',
 'you know tom .',
 "it ' ll only take a minute .",
 'i am learning for you .',
 'he cannot buy a car , still less a house .',
 "you want answers , don ' t you ?",
 'get lost !',
 'my parents have just arrived at the station .',
 'you are saying you intentionally hide your good looks ?',
 'he never takes into account the fact that i am very busy .']

In [552]:
q = eng_tokenizer.encode_batch(["she is going to school by my car"])

In [553]:
eng_tokenizer.decode(q[0].ids)

'she is going to school by my car'

In [554]:
len(q[0].ids)

66

In [555]:
input_ = torch.tensor([x.ids for x in q[:1]]).cuda()
# output_ = torch.tensor([x.ids for x in hin_encode[10:30]]).cuda()

In [556]:
en = e.forward(input_)

In [557]:
hidden = en[1][0][1].unsqueeze(0)
cell = en[1][1][1].unsqueeze(0)
en = en[0]

In [558]:
hidden.shape

torch.Size([1, 1, 1000])

In [559]:
y_last_pred,hidden, cell = d.forward(output_[:1,0].unsqueeze(1), en, hidden,cell)
y_last_pred = y_last_pred.argmax(dim = 2)

In [560]:
y_last_pred

tensor([[167]], device='cuda:0')

In [561]:
a = torch.zeros((69,1,1))

In [562]:
for i in range(0,69):
    y_last_pred,  hidden, cell = d.forward(y_last_pred, en, hidden, cell)
    y_last_pred = y_last_pred.argmax(dim = 2)
    a[i] = y_last_pred

In [563]:
a = a.permute(1,0,2)

In [564]:
a.shape

torch.Size([1, 69, 1])

In [565]:
a = a.view(1,-1)

In [566]:
a = a.type(torch.int32)

In [567]:
hin_tokenizer.decode_batch(a.detach().cpu().tolist())

['वह मेरी स्कूल से जा रही है ।']

In [535]:
a

tensor([[2481,   10,  172,   10,  172,  156,   10,  156,  150,   21,  131,   21,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,    2,
            2,    2,    2,    2,    2,    2,    2,    2,    2]],
       dtype=torch.int32)