## A Bhagwad Gita verse-translator analyser. From my learnings of FastAI Lesson 4 (NLP)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style("whitegrid")
plt.style.use("fivethirtyeight")

In [None]:
dataset = pd.read_csv("/content/Bhagwad_Gita.csv")
dataset.head()

Unnamed: 0,ID,Chapter,Verse,Shloka,Transliteration,HinMeaning,EngMeaning,WordMeaning
0,BG1.1,1,1,धृतराष्ट्र उवाच |\nधर्मक्षेत्रे कुरुक्षेत्रे स...,dhṛtarāṣṭra uvāca .\ndharmakṣetre kurukṣetre s...,।।1.1।।धृतराष्ट्र ने कहा -- हे संजय ! धर्मभूमि...,1.1 Dhritarashtra said What did my people and...,1.1 धर्मक्षेत्रे on the holy plain? कुरुक्षेत्...
1,BG1.2,1,2,सञ्जय उवाच |\nदृष्ट्वा तु पाण्डवानीकं व्यूढं द...,sañjaya uvāca .\ndṛṣṭvā tu pāṇḍavānīkaṃ vyūḍha...,।।1.2।।संजय ने कहा -- पाण्डव-सैन्य की व्यूह रच...,1.2. Sanjaya said Having seen the army of the...,1.2 दृष्ट्वा having seen? तु indeed? पाण्डवानी...
2,BG1.3,1,3,पश्यैतां पाण्डुपुत्राणामाचार्य महतीं चमूम् |\n...,paśyaitāṃ pāṇḍuputrāṇāmācārya mahatīṃ camūm .\...,।।1.3।।हे आचार्य ! आपके बुद्धिमान शिष्य द्रुपद...,"1.3. ""Behold, O Teacher! this mighty army of t...",1.3 पश्य behold? एताम् this? पाण्डुपुत्राणाम् ...
3,BG1.4,1,4,अत्र शूरा महेष्वासा भीमार्जुनसमा युधि |\nयुयुध...,atra śūrā maheṣvāsā bhīmārjunasamā yudhi .\nyu...,।।1.4।।इस सेना में महान् धनुर्धारी शूर योद्धा ...,"1.4. Here are heroes, mighty archers, eal in b...",1.4 अत्र here? शूराः heroes? महेष्वासाः mighty...
4,BG1.5,1,5,धृष्टकेतुश्चेकितानः काशिराजश्च वीर्यवान् |\nपु...,dhṛṣṭaketuścekitānaḥ kāśirājaśca vīryavān .\np...,"।।1.5।।धृष्टकेतु, चेकितान, बलवान काशिराज, पुर...","1.5. ""Dhrishtaketu, chekitana and the valiant ...",1.5 धृष्टकेतुः Dhrishtaketu? चेकितानः Chekitan...


In [None]:
#train-test splitting
from sklearn.model_selection import train_test_split
source = dataset['Shloka']
target = dataset['EngMeaning']

source_train, source_test, target_train, target_test = train_test_split(source, target, test_size=0.2, random_state=42)

print(source_train.head())
print(target_train.head())

165    अर्जुन उवाच |\nअपरं भवतो जन्म परं जन्म विवस्वत...
542    नान्यं गुणेभ्यः कर्तारं यदा द्रष्टानुपश्यति |\...
284    अपरेयमितस्त्वन्यां प्रकृतिं विद्धि मे पराम् |\...
360    येऽप्यन्यदेवता भक्ता यजन्ते श्रद्धयान्विताः |\...
110    रागद्वेषविमुक्तैस्तु विषयानिन्द्रियैश्चरन् | (...
Name: Shloka, dtype: object
165    4.4 Arjuna said  Later on was Thy birth, and p...
542    14.19 When the seer beholds no agent other tha...
284    7.5 This is the inferior Prakriti, O mighty-ar...
360    9.23 Even those devotees who, endowed with fai...
110    2.64 But the self-controlled man, moving among...
Name: EngMeaning, dtype: object


In [None]:
from torchtext.data.utils import get_tokenizer
from collections import Counter
from torchtext.vocab import vocab
import torch.nn as nn
from torch.utils.data import DataLoader
import spacy
from spacy.lang.hi.examples import sentences
from torch.nn.utils.rnn import pad_sequence
from torch.nn import (TransformerEncoder, TransformerDecoder, TransformerEncoderLayer, TransformerDecoderLayer)



### Tokenization:



In [None]:
en_tokenizer = get_tokenizer('spacy', language = 'en_core_web_sm')

In [None]:
! pip install indic-nlp-library

Collecting indic-nlp-library
  Downloading indic_nlp_library-0.92-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sphinx-argparse (from indic-nlp-library)
  Downloading sphinx_argparse-0.4.0-py3-none-any.whl (12 kB)
Collecting sphinx-rtd-theme (from indic-nlp-library)
  Downloading sphinx_rtd_theme-2.0.0-py2.py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting morfessor (from indic-nlp-library)
  Downloading Morfessor-2.0.6-py3-none-any.whl (35 kB)
Collecting sphinxcontrib-jquery<5,>=4 (from sphinx-rtd-theme->indic-nlp-library)
  Downloading sphinxcontrib_jquery-4.1-py2.py3-none-any.whl (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.1/121.1 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: morfessor, sphinxcon

In [None]:
from indicnlp.tokenize import indic_tokenize
from indicnlp.normalize.indic_normalize import IndicNormalizerFactory

def sa_tokenizer(data):
  factory=IndicNormalizerFactory()
  normalizer=factory.get_normalizer('sa',remove_nuktas = True)
  text = normalizer.normalize(data)
  words = indic_tokenize.trivial_tokenize(text)
  return words

In [None]:
sample_text = "अर्जुन उवाच |\nअपरं भवतो जन्म परं जन्म विवस्वत"

sa_tokenizer(sample_text)

['अर्जुन', 'उवाच', '।', '\nअपरं', 'भवतो', 'जन्म', 'परं', 'जन्म', 'विवस्वत']

In [None]:
def build_vocab(filedata, tokenizer):
    counter = Counter()
    for string_ in filedata:
        counter.update(tokenizer(string_))
    print(counter)
    return(vocab(counter, specials = ['<unk>', '<pad>', '<bos>', '<eos>']))

In [None]:
sa_vocab = build_vocab(source_train,sa_tokenizer)
sa_vocab.set_default_index(sa_vocab['<unk>'])
sa_vocab['<eos>']

Counter({'।': 2850, '-': 569, 'च': 146, 'न': 110, '१८': 72, '२': 63, '११': 62, '६': 57, '१': 55, '४': 52, 'मे': 46, '१०': 46, '३': 46, '९': 41, '१४': 40, 'स': 40, '७': 39, '१३': 39, '१६': 37, '१७': 37, 'मां': 36, '८': 35, 'ते': 33, '५': 32, 'हि': 32, '१२': 30, '१५': 30, 'कर्म': 29, 'उवाच': 28, 'पार्थ': 23, 'श्रीभगवानुवाच': 22, 'कौन्तेय': 20, 'अर्जुन': 19, 'तु': 19, 'भारत': 17, '१९': 16, '\nन': 16, 'चैव': 16, '२०': 15, 'एव': 15, 'ये': 15, '२२': 15, '२१': 14, 'वा': 14, 'यो': 14, '२५': 13, '२७': 13, '२३': 12, 'ब्रह्म': 12, 'परं': 11, 'महाबाहो': 11, 'यः': 11, 'योगी': 11, '२६': 11, 'त्वां': 11, '२४': 11, 'मम': 11, 'यथा': 11, 'यदा': 10, 'विद्धि': 10, 'परमं': 10, '\nस': 10, '२९': 10, '२८': 10, 'भूतानि': 10, '३०': 9, 'मयि': 9, '३४': 9, 'शृणु': 9, 'तथा': 9, '३६': 8, 'पुनः': 8, 'कर्माणि': 8, 'सञ्जय': 8, 'इति': 8, '४१': 8, '३३': 8, 'किं': 8, 'ज्ञानं': 8, 'मनः': 7, 'उच्यते': 7, 'सर्वशः': 7, 'परन्तप': 7, 'गतिम्': 7, '३५': 7, 'त्वं': 7, 'मया': 7, 'यत्': 7, '३७': 7, '४२': 7, 'सत्त्वं': 7, 'भवति': 7, 

3

In [None]:
en_vocab = build_vocab(target_train, en_tokenizer)
en_vocab.set_default_index(en_vocab['<unk>'])
en_vocab[' ']

Counter({',': 1565, 'the': 1195, '.': 637, 'of': 602, 'and': 517, 'is': 356, 'to': 354, 'in': 290, ';': 258, ')': 253, '(': 252, 'O': 190, 'I': 182, 'by': 171, 'who': 163, 'Me': 150, 'Arjuna': 141, 'with': 126, '-': 122, 'all': 119, 'be': 111, 'that': 105, ' ': 104, 'not': 104, 'as': 103, 'am': 102, 'this': 101, 'a': 98, 'are': 97, 'for': 96, 'he': 92, 'from': 85, 'which': 82, 'The': 75, 'thou': 72, 'action': 71, 'also': 69, 'said': 67, '\n': 63, 'or': 62, 'one': 60, 'Self': 59, 'knowledge': 59, 'mind': 58, 'nor': 58, 'Yoga': 52, 'among': 52, 'Lord': 51, 'on': 50, 'beings': 45, 'these': 44, 'world': 43, 'having': 43, 'He': 43, 'it': 42, 'do': 40, 'My': 39, 'his': 38, 'they': 37, 'self': 37, 'their': 37, 'actions': 36, 'has': 36, 'know': 35, 'born': 35, 'senses': 34, 'without': 34, 'those': 33, 'men': 33, 'does': 33, 'man': 32, 'what': 32, 'Thee': 31, 'Brahman': 31, 'have': 31, 'sacrifice': 31, 'body': 30, 'Thy': 29, 'Thou': 29, '?': 29, 'even': 29, 'Krishna': 29, 'That': 28, 'been': 28

7

In [None]:
print(en_vocab['Arjuna'])

5


### Preprocessing:

In [None]:
import torch

def data_process(filepath_source,filepath_target):
    raw_en_iter = iter(filepath_source)
    raw_hi_iter = iter(filepath_target)
    data = []
    for (raw_en, raw_hi) in zip(raw_en_iter, raw_hi_iter):
        en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en.rstrip("\n"))],dtype = torch.long)
        sa_tensor_ = torch.tensor([sa_vocab[token] for token in sa_tokenizer(raw_hi.rstrip("\n"))],dtype = torch.long)
        data.append((sa_tensor_, en_tensor_))
    return data

In [None]:
train_data = data_process(source_train,target_train)
test_data = data_process(source_test, target_test)

In [None]:
train_data

[(tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0, 52,  0,  0, 55,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0]),
  tensor([  0,   0,   0, 251,   0,   0,   0,   0,   0,   0,   0, 251,   0,   0,
            0,   0,  63,   0])),
 (tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0]),
  tensor([  0,   0,   0,   0,   0,   0, 251,   0,   0,   0,   0,   0,   0,  63,
            0])),
 (tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 16,  0, 52,  0, 55,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0, 52,  0, 55,  0,  0,  0,  0, 16,  0,  0,
           0,  0,  0,  0,  0,  0,  0]),
  tensor([  0,   0,   0,   0,   0,   0, 251,   0,   0,   0,   0,   0,   0,  63,
            0])),
 (tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0]),
  tensor([  0,   0,   0,   0,   0, 251,   0,   0,   0,   0,   0,  63,   0])),
 (tensor(

In [None]:
BATCH_SIZE = 128
PAD_IDX = en_vocab['<pad>']
BOS_IDX = en_vocab['<bos>']
EOS_IDX = en_vocab['<eos>']
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def generate_batch(data_batch):
    en_batch, sa_batch = [], []
    for (en_item, sa_item) in data_batch:
        sa_batch.append(torch.cat([torch.tensor([BOS_IDX]), sa_item, torch.tensor([EOS_IDX])], dim = 0))
        en_batch.append(torch.cat([torch.tensor([BOS_IDX]), en_item, torch.tensor([EOS_IDX])], dim = 0))
    en_batch = pad_sequence(en_batch, padding_value = PAD_IDX)
    sa_batch = pad_sequence(sa_batch, padding_value = PAD_IDX)
    return sa_batch, en_batch

In [None]:
train_iter = DataLoader(train_data, batch_size = BATCH_SIZE, shuffle = True, collate_fn = generate_batch)
test_iter = DataLoader(test_data, batch_size = BATCH_SIZE, shuffle = True, collate_fn = generate_batch)


In [None]:
print(len(train_iter))
for (idx, batch) in enumerate(train_iter):
    print(idx)
    print(batch)

5
0
(tensor([[2, 2, 2,  ..., 2, 2, 2],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]]), tensor([[2, 2, 2,  ..., 2, 2, 2],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]]))
1
(tensor([[2, 2, 2,  ..., 2, 2, 2],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 3, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]]), tensor([[2, 2, 2,  ..., 2, 2, 2],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]]))
2
(tensor([[2, 2, 2,  ..., 2, 2, 2],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...

### Transformer Building:

In [None]:
from torch import Tensor
import math

class Seq2SeqTransformer(nn.Module):
    def __init__(self, num_encoder_layers:int, num_decoder_layers:int, emb_size:int, src_vocab_size:int, tgt_vocab_size:int, dim_feedforward:int = 512, dropout:float = 0.1):
        super(Seq2SeqTransformer, self).__init__()
        encoder_layer = TransformerEncoderLayer(d_model = emb_size, nhead = NHEAD, dim_feedforward = dim_feedforward)
        self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers = num_encoder_layers)
        decoder_layer = TransformerDecoderLayer(d_model = emb_size, nhead = NHEAD, dim_feedforward = dim_feedforward)
        self.transformer_decoder = TransformerDecoder(decoder_layer, num_layers = num_decoder_layers)

        self.generator = nn.Linear(emb_size, tgt_vocab_size)
        self.src_tok_emb = TokenEmbedding(src_vocab_size, emb_size)
        self.tgt_tok_emb = TokenEmbedding(tgt_vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout = dropout)

    def forward(self, src: Tensor, tgt: Tensor, src_mask:Tensor, tgt_mask:Tensor, src_padding_mask:Tensor, tgt_padding_mask:Tensor, memory_key_padding_mask:Tensor):
        src_emb = self.positional_encoding(self.src_tok_emb(src))
        tgt_emb = self.positional_encoding(self.tgt_tok_emb(tgt))
        memory = self.transformer_encoder(src_emb, src_mask, src_padding_mask)
        outs = self.transformer_decoder(tgt_emb, memory, tgt_mask, None, tgt_padding_mask, memory_key_padding_mask)
        return self.generator(outs)

    def encode(self, src:Tensor, src_mask:Tensor):
        return self.transformer_encoder(self.positional_encoding(self.src_tok_emb(src)), src_mask)

    def decode(self, tgt:Tensor, memory:Tensor, tgt_mask:Tensor):
        return self.transformer_decoder(self.positional_encoding(self.tgt_tok_emb(tgt)), memory, tgt_mask)

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, emb_size:int, dropout, maxlen:int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(-torch.arange(0,emb_size, 2) * math.log(10000)/emb_size)
        pos = torch.arange(0,maxlen).reshape(maxlen,1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:,0::2] = torch.sin(pos*den)
        pos_embedding[:,1::2] = torch.cos(pos*den)
        pos_embedding = pos_embedding.unsqueeze(-2)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding',pos_embedding)

    def forward(self, token_embedding:Tensor):
        return self.dropout(token_embedding + self.pos_embedding[:token_embedding.size(0),:])


class TokenEmbedding(nn.Module):
    def __init__(self,vocab_size:int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens:Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)

In [None]:
def generate_square_subsequent_mask(sz):
    mask = (torch.triu(torch.ones((sz,sz), device = DEVICE)) == 1).transpose(0,1)
    mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
    return mask

def create_mask(src,tgt):
    src_seq_len = src.shape[0]
    tgt_seq_len = tgt.shape[0]

    tgt_mask = generate_square_subsequent_mask(tgt_seq_len)
    src_mask = torch.zeros((src_seq_len, src_seq_len), device = DEVICE).type(torch.bool)

    src_padding_mask = (src == PAD_IDX).transpose(0,1)
    tgt_padding_mask = (tgt == PAD_IDX).transpose(0,1)
    return src_mask, tgt_mask, src_padding_mask, tgt_padding_mask

In [None]:
SRC_VOCAB_SIZE = len(sa_vocab)
TGT_VOCAB_SIZE = len(en_vocab)
EMB_SIZE = 512
NHEAD = 8
FFN_HID_DIM = 32
BATCH_SIZE = 1
NUM_ENCODER_LAYERS = 6
NUM_DECODER_LAYERS = 6
NUM_EPOCHS = 500
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
transformer = Seq2SeqTransformer(NUM_ENCODER_LAYERS, NUM_DECODER_LAYERS, EMB_SIZE, SRC_VOCAB_SIZE, TGT_VOCAB_SIZE, FFN_HID_DIM)

for p in transformer.parameters():
    if p.dim()>1:
        nn.init.xavier_uniform_(p)

transformer = transformer.to(DEVICE)

loss_fn = torch.nn.CrossEntropyLoss(ignore_index = PAD_IDX)

optimizer = torch.optim.Adam(transformer.parameters(), lr = 0.0001, betas = (0.9,0.98), eps = 1e-9)



### Training the Model:

In [None]:
def train_epoch(model, train_iter, optimizer):
    model.train()
    losses = 0
    for idx, (src,tgt) in enumerate(train_iter):
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)
        tgt_input = tgt[:-1,:]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)

        optimizer.zero_grad()

        tgt_out = tgt[1:,:]
        loss = loss_fn(logits.reshape(-1,logits.shape[-1]), tgt_out.reshape(-1))
        loss.backward()

        optimizer.step()
        losses += loss.item()
    torch.save(model,'BhagwadGita_Translator.pth')
    return losses/len(train_iter)

In [None]:
def evaluate(model,test_iter):
    model.eval()
    losses = 0
    for idx, (src, tgt) in (enumerate(test_iter)):
        src = src.to(DEVICE)
        tgt = tgt.to(DEVICE)

        tgt_input = tgt[:-1,:]

        src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_input)

        logits = model(src, tgt_input, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
        tgt_out = tgt[1:,:]
        loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
        losses += loss.item()
    return losses/ len(test_iter)

In [None]:
import time

for epoch in range(1, NUM_EPOCHS+1):
    start_time = time.time()
    train_loss = train_epoch(transformer, train_iter, optimizer)
    end_time = time.time()
    test_loss = evaluate(transformer, test_iter)
    print((f"Epoch : {epoch}, Train loss: {train_loss:.3f}, Test Loss: {test_loss:.3f},"
           f"Epoch Time= {(end_time - start_time):.3f}s"))



Epoch : 1, Train loss: 3.253, Test Loss: 0.593,Epoch Time= 3.102s
Epoch : 2, Train loss: 0.716, Test Loss: 0.607,Epoch Time= 1.553s
Epoch : 3, Train loss: 0.634, Test Loss: 0.585,Epoch Time= 1.574s
Epoch : 4, Train loss: 0.573, Test Loss: 0.420,Epoch Time= 1.873s
Epoch : 5, Train loss: 0.518, Test Loss: 0.455,Epoch Time= 2.540s
Epoch : 6, Train loss: 0.490, Test Loss: 0.363,Epoch Time= 2.369s
Epoch : 7, Train loss: 0.446, Test Loss: 0.359,Epoch Time= 2.277s
Epoch : 8, Train loss: 0.430, Test Loss: 0.356,Epoch Time= 2.432s
Epoch : 9, Train loss: 0.416, Test Loss: 0.387,Epoch Time= 2.124s
Epoch : 10, Train loss: 0.397, Test Loss: 0.373,Epoch Time= 2.544s
Epoch : 11, Train loss: 0.375, Test Loss: 0.320,Epoch Time= 2.825s
Epoch : 12, Train loss: 0.377, Test Loss: 0.295,Epoch Time= 1.907s
Epoch : 13, Train loss: 0.361, Test Loss: 0.348,Epoch Time= 1.640s
Epoch : 14, Train loss: 0.362, Test Loss: 0.320,Epoch Time= 1.592s
Epoch : 15, Train loss: 0.350, Test Loss: 0.331,Epoch Time= 1.618s
Epoc

### Putting it together:

In [None]:
def greedy_decode(model, src, src_mask, max_len, start_symbol):
    src = src.to(DEVICE)
    src_mask = src_mask.to(DEVICE)
    memory = model.encode(src, src_mask)
    ys = torch.ones(1,1).fill_(start_symbol).type(torch.long).to(DEVICE)
    for i in range(max_len -1):
        memory = memory.to(DEVICE)
        memory_mask = torch.zeros(ys.shape[0], memory.shape[0]).to(DEVICE).type(torch.bool)
        tgt_mask = (generate_square_subsequent_mask(ys.size(0)).type(torch.bool)).to(DEVICE)
        out = model.decode(ys, memory, tgt_mask)
        out = out.transpose(0,1)
        prob = model.generator(out[:,-1])
        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.item()

        ys = torch.cat([ys, torch.ones(1,1).type_as(src.data).fill_(next_word)],dim = 0)
        if next_word == EOS_IDX:
            break
    return ys

In [None]:
tok = 'a'
something = en_vocab[tok]
something

450

In [None]:
def translate(model, src, src_vocab, tgt_vocab, src_tokenizer):
    model.eval()
    itos = tgt_vocab.get_itos()
    tokens = [BOS_IDX] + [src_vocab[tok] for tok in src_tokenizer(src)] + [EOS_IDX]
    num_tokens = len(tokens)
    src = (torch.LongTensor(tokens).reshape(num_tokens,1))
    src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool)
    tgt_tokens = greedy_decode(model, src, src_mask, max_len = num_tokens + 5, start_symbol = BOS_IDX).flatten()
    return " ".join([itos[tok] for tok in tgt_tokens]).replace("<bos>", "").replace("<eos>", "")

In [None]:
dataset['Shloka'][100]

'अर्जुन उवाच |\nस्थितप्रज्ञस्य का भाषा समाधिस्थस्य केशव |\nस्थितधीः किं प्रभाषेत किमासीत व्रजेत किम् ||२-५४||'

In [None]:
text = "अर्जुन उवाच |\nस्थितप्रज्ञस्य का भाषा समाधिस्थस्य केशव |\nस्थितधीः किं प्रभाषेत किमासीत व्रजेत किम् ||२-५४||"
output = translate(transformer, text, sa_vocab, en_vocab, sa_tokenizer)
print(f'Shlok: {text} \nMeaning: {output}')

Shlok: अर्जुन उवाच |
स्थितप्रज्ञस्य का भाषा समाधिस्थस्य केशव |
स्थितधीः किं प्रभाषेत किमासीत व्रजेत किम् ||२-५४|| 
Meaning:  <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>
