In [1]:
from utils import *
import numpy as np
import pickle
# import torch

In [2]:
index_to_english_alphabet=pickle.load(open('vocab_tools/index_to_english_alphabet.pickle', 'rb'))
index_to_hindi_alphabet=pickle.load(open('vocab_tools/index_to_hindi_alphabet.pickle', 'rb'))

In [3]:
hindi_alphabet_to_index=pickle.load(open('vocab_tools/hindi_alphabet_to_index.pickle', 'rb')) 
english_alphabet_to_index=pickle.load(open('vocab_tools/english_alphabet_to_index.pickle', 'rb')) 

In [4]:
X_train=np.load('simple_data/X_train.npy')
X_valid=np.load('simple_data/X_val.npy')

y_train=np.load('simple_data/y_train.npy')
y_valid=np.load('simple_data/y_val.npy')

In [5]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
class Eng_Hind_Dataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, in_file, out_file, root_dir='simple_data',device='cuda'):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.input = torch.tensor(np.load(root_dir+'/'+in_file))
        self.output = torch.tensor(np.load(root_dir+'/'+out_file))
        
        assert(len(self.input)==len(self.output),"Error: I/O Lengths must be same")
        

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        X=self.input[idx]
        X=X.to(device)
        y=self.output[idx]
        y=y.to(device)
        


        sample = {'input': X, 'output': y}

        return sample

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
device

device(type='cuda')

In [9]:
training_data=Eng_Hind_Dataset("X_train.npy","y_train.npy",device=device)
val_data=Eng_Hind_Dataset("X_val.npy","y_val.npy",device=device)
test_data=Eng_Hind_Dataset("X_test.npy","y_test.npy",device=device)

In [10]:
train_dataloader = DataLoader(training_data, batch_size=16,shuffle=True)

In [11]:
val_dataloader = DataLoader(val_data, batch_size=16,shuffle=True)

In [12]:
import torch.nn as nn

In [13]:
def cell_type(mode:str='rnn'):
    mode=mode.lower()
    if mode == 'rnn':
        return nn.RNN
    elif mode =='gru':
        return nn.GRU
    else:
        return nn.LSTM
        

In [19]:
class Encoder(nn.Module):


    def __init__(self, input_size, embed_size, enc_hid_size, dec_hid_size, num_layers, cell_mode, dropout, is_bi):
        super().__init__()
        
        # create embedding layer
        self.embedding = nn.Embedding(input_size, embed_size,padding_idx=english_alphabet_to_index['.'])

        #creating LSTM/GRU/RNN cell
        cell=cell_type(cell_mode)
        
        self.cell=cell(embed_size,enc_hid_size,num_layers,dropout=dropout,bidirectional=is_bi,batch_first=True)
        self.cell_mode=cell_mode
        self.is_bi=is_bi
        if is_bi:
            self.fc=nn.Linear(enc_hid_size*2, dec_hid_size)
        else:
            self.fc=nn.Linear(enc_hid_size, dec_hid_size)
        
        

    def forward(self, input_batch: torch.LongTensor):
        """

        Parameters
        ----------
        src_batch : 2d torch.LongTensor
            Batched tokenized source sentence of shape [sent len, batch size].

        Returns
        -------
        hidden, cell : 3d torch.LongTensor
            Hidden and cell state of the LSTM layer. Each state's shape
            [n layers * n directions, batch size, hidden dim]
        """
        embedded = self.embedding(input_batch) # [sent len, batch size, emb dim]
        print('encoder embd',embedded.shape)
        
        if self.cell_mode.lower()=='lstm':
            outputs, (hidden, cell) = self.cell(embedded)
            if self.is_bi:
                concated = torch.cat((hidden[ :,-2, :], hidden[ :,-1, :]), dim=1)
                
            else:
                concated = hidden[ :,-1, :]
                print(concated.shape)
            
            hidden = torch.tanh(self.fc(concated))
            return outputs,hidden
            
            # return hidden, cell  
        
        else:
            outputs, hidden = self.cell(embedded)
            print('init_hidden', hidden.shape)
            
            print('hid',hidden[ :,-2, :].shape)
            print('hiddd',hidden.shape)
            
            if self.is_bi:
#                 concated = torch.cat((hidden[ :,-2, :], hidden[ :,-1, :]), dim=1)
                concated = torch.cat((hidden[ -2, :,:], hidden[ -1, :,:]), dim=1)
                print('oooo',concated.shape)
                
            else:
                concated = hidden[ -1,:, :]
                print(concated.shape)
                
            hidden = torch.tanh(self.fc(concated))
            
            return outputs,hidden

        


In [14]:
ss1=next(iter(train_dataloader))

In [15]:
hindi_alphabet_to_index['.']

2

In [310]:
class Decoder_s(nn.Module):
    def __init__(self, output_size, embed_dim, hidden_size, num_layers, dropout,cell_type,attention):
        super().__init__()
        self.hidden_size = hidden_size
        self.cell_type=cell_type
        self.embedding = nn.Embedding(output_size, embed_dim,padding_idx=2)
        if cell_type=="LSTM":
          self.rnn = nn.LSTM((hidden_size*2)+embed_dim, hidden_size, num_layers,  dropout=dropout)
        elif cell_type=="GRU":
          self.rnn=nn.GRU((hidden_size*2)+embed_dim,hidden_size,num_layers,dropout=dropout,bidirectional=False)
        else:
          self.rnn=nn.RNN(hidden_size*2+embed_dim,hidden_size,num_layers,dropout=dropout)

        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout=nn.Dropout(dropout)
        self.attention=attention
        self.rnn

    def forward(self, x,encoder_output,hidden,cell):
        attention = self.attention(encoder_output, hidden).unsqueeze(1)
#         output = encoder_output.permute(1, 0, 2)
        output = encoder_output
        print(attention.shape)
        print(output.shape)
        
        context = torch.bmm(attention, output).permute(1, 0, 2)
        print('att',attention.shape)
        print('out',output.shape)
        print('ctx',context.shape)
        print('hid',hidden.shape)
        x = x.unsqueeze(0)
        embedded = self.dropout(self.embedding(x))

        rnn_input = torch.cat((embedded, context), dim=2)
        if self.cell_type=="LSTM":
          output,(hidden,cell) = self.rnn(rnn_input,(hidden.unsqueeze(0),cell.unsqueeze(0)))
          output= self.fc(output.squeeze(0))
          return output, hidden.squeeze(0),cell.squeeze(0)
        
        elif self.cell_type=="GRU":
          output,hidden = self.rnn(rnn_input,hidden.unsqueeze(0))
          output = output.squeeze(0)
          output= self.fc(output)
          return output, hidden.squeeze(0)
        
        else:
          output,hidden = self.rnn(rnn_input,hidden.unsqueeze(0))
          output = output.squeeze(0)
          output= self.fc(output)
          return output, hidden.squeeze(0)

In [16]:
class Decoder(nn.Module):

    def __init__(self, output_size, embed_size, enc_hid_dim, dec_hid_dim, num_layers, cell_mode,\
                 dropout, attention, is_bi):
        super().__init__()
    
        self.dropout = dropout
        self.attention = attention

        self.embedding = nn.Embedding(output_size, embed_size,padding_idx=hindi_alphabet_to_index['.'])
        cell=cell_type(cell_mode)
        
        
        if is_bi:
            self.cell=cell((enc_hid_dim * 2) + embed_size, dec_hid_dim,num_layers,\
                           dropout=dropout,bidirectional=False,batch_first=False)
        

        else:
            self.cell=cell(enc_hid_dim + embed_size, dec_hid_dim,num_layers,\
                           dropout=dropout,bidirectional=False,batch_first=False)
            
        
        
        self.out = nn.Linear(dec_hid_dim, output_size)

    def forward(self, trg, encoder_outputs, hidden):
 
        attention = self.attention(encoder_outputs, hidden).unsqueeze(1)
        print('att',attention.shape)
        print('outs', encoder_outputs.shape)
        
        context = torch.bmm(attention, encoder_outputs).permute(1, 0, 2)

        # input sentence -> embedding
        # [1, batch size, emb dim]
        embedded = self.embedding(trg.unsqueeze(0))
        print('emdb;,',embedded.shape)
        print('contxt',context.shape)
        cell_input = torch.cat((embedded, context), dim=2)

        outputs, hidden = self.cell(cell_input, hidden.unsqueeze(0))
        prediction = self.out(outputs.squeeze(0))
        return prediction, hidden.squeeze(0)

In [20]:


# first experiment with n_layers = 1
# input_size, embed_size, hid_size, dechid num_layers, cell_mode, dropout, is_bi
encoder = Encoder(30, 128, 128, 128, 1, 'gru', 0.2, True).to(device)
outputs, hidden = encoder(ss1['input'])
outputs.shape, hidden.shape



encoder embd torch.Size([16, 30, 128])
init_hidden torch.Size([2, 16, 128])
hid torch.Size([2, 128])
hiddd torch.Size([2, 16, 128])
oooo torch.Size([16, 256])


(torch.Size([16, 30, 256]), torch.Size([16, 128]))

In [21]:
outputs[:, -1, :128].shape

torch.Size([16, 128])

In [22]:
# assert (outputs[:, -1,:128] == hidden[2]).all()
# assert (outputs[:,0, 128:] == hidden[3]).all()

In [27]:
# enc_hid_dim, dec_hid_dim, is_bi

attention = Attention(128, 128, True).to(device)
attention_weight = attention(outputs, hidden)
attention_weight.shape

attn:outs torch.Size([16, 30, 256])
attn:hid torch.Size([16, 30, 128])


torch.Size([16, 30])

In [28]:
ss1['output'][:,0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')

In [29]:
print(hidden.shape)

torch.Size([16, 128])


In [30]:
ss1['output'][:,0].shape

torch.Size([16])

In [31]:
decoder = Decoder(68, 128, 128, 128, 1,'gru', 0.2, attention, False).to(device)


In [32]:
decoder

Decoder(
  (attention): Attention(
    (fc1): Linear(in_features=384, out_features=128, bias=True)
    (fc2): Linear(in_features=128, out_features=1, bias=False)
  )
  (embedding): Embedding(68, 128, padding_idx=2)
  (cell): GRU(256, 128, dropout=0.2)
  (out): Linear(in_features=128, out_features=68, bias=True)
)

In [33]:
len(english_alphabet_to_index)

29

In [34]:
decoder_s=Decoder_s(68,128,128,1,0.1,"GRU",attention)
decoder_s.to(device)
# embed_dim, hidden_size, num_layers, dropout,cell_type,attention

NameError: name 'Decoder_s' is not defined

In [35]:
prediction, decoder_hidden = decoder_s(ss1['output'][:,0], outputs, hidden,None)
# output_size, embed_size, enc_hid_dim, dec_hid_dim, num_layers, cell_mode, dropout, attention, is_bi
# notice the decoder_hidden's shape should match the shape that's generated by
# the encoder
prediction.shape, decoder_hidden.shape

NameError: name 'decoder_s' is not defined

In [341]:
decoder = Decoder(68, 128, 128, 128, 1,'gru', 0.2, attention, False).to(device)
prediction, decoder_hidden = decoder(ss1['output'][:,0], outputs, hidden)
# output_size, embed_size, enc_hid_dim, dec_hid_dim, num_layers, cell_mode, dropout, attention, is_bi
# notice the decoder_hidden's shape should match the shape that's generated by
# the encoder
prediction.shape, decoder_hidden.shape

<class 'torch.nn.modules.rnn.GRU'>
attn:outs torch.Size([16, 30, 256])
attn:hid torch.Size([16, 30, 128])
att torch.Size([16, 1, 30])
outs torch.Size([16, 30, 256])
emdb;, torch.Size([1, 16, 128])
contxt torch.Size([1, 16, 256])


(torch.Size([16, 68]), torch.Size([16, 128]))

In [26]:
class Attention(nn.Module):

    def __init__(self, enc_hid_dim, dec_hid_dim, is_bi):
        super().__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        # enc_hid_dim multiply by 2 due to bidirectional
        if is_bi:
            self.fc1 = nn.Linear(enc_hid_dim * 2 + dec_hid_dim, dec_hid_dim)

            
            
        self.fc2 = nn.Linear(dec_hid_dim, 1, bias=False)

    def forward(self, encoder_outputs, hidden):
        src_len = encoder_outputs.shape[1]
        batch_size = encoder_outputs.shape[0]
        
        # repeat encoder hidden state src_len times [batch size, sent len, dec hid dim]
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        # reshape/permute the encoder output, so that the batch size comes first
        
        # [batch size, sent len, enc hid dim * 2], times 2 because of bidirectional
        # outputs = encoder_outputs.permute(1, 0, 2)
        outputs = encoder_outputs

        # the attention mechanism receives a concatenation of the hidden state
        # and the encoder output
        print('attn:outs',outputs.shape)
        print('attn:hid',hidden.shape)
        concat = torch.cat((hidden, outputs), dim=2)
        
        # fully connected layer and softmax layer to compute the attention weight
        # [batch size, sent len, dec hid dim]
        energy = torch.tanh(self.fc1(concat))

        # attention weight should be of [batch size, sent len]
        attention = self.fc2(energy).squeeze(dim=2)        
        attention_weight = torch.softmax(attention, dim=1)
        return attention_weight

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device


    def forward(self, source_batch, target_batch, teacher_forcing_ratio=0.5):

        batch_size,max_len  = target_batch.shape
        #print(max_len,batch_size)
        target_vocab_size = self.decoder.output_size
        #print(target_vocab_size)

        # tensor to store decoder's output
        outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)

        # last hidden & cell state of the encoder is used as the decoder's initial hidden state
        hidden, cell = self.encoder(source_batch)       

        trg = target_batch[:,0]
        for i in range(1, max_len):
            prediction, hidden, cell = self.decoder(trg, hidden, cell)
            outputs[i] = prediction

            if random.random() < teacher_forcing_ratio:
                trg = target_batch[:,i]
            else:
                trg = prediction.argmax(1)

        return outputs


        
        

In [None]:
hindi_alphabet_to_index['>']

In [None]:
def accuracy_calc(target_seq,seq2,mode='full',device=device):# predicted
    eos_index=(target_seq==hindi_alphabet_to_index['>']).nonzero()
    eos_idx=eos_index[:,1]
    
    correct=torch.Tensor([0]).to(device)
    correct_chars=torch.Tensor([0]).to(device)
    tot_chars=torch.Tensor([0]).to(device)
    for iterate,idx in enumerate(eos_idx):
        inputter=seq2[iterate][:idx]
        outputter=target_seq[iterate][:idx]
        if torch.all(torch.eq(inputter,outputter)):
            correct+=1
            correct_chars+=idx
            tot_chars+=idx
        else:
            correct_chars+=torch.sum(inputter == outputter).item()
            tot_chars+=idx
            
#         print(correct,correct_chars,tot_chars)
        
    return correct.item(),correct_chars.item(),tot_chars.item()
            
            
        
    

In [None]:
accuracy_calc(ss1['output'],ss1['output'])

In [None]:
def train(seq2seq, iterator, optimizer, criterion):
    
    
    seq2seq.train()
    
    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    
    for batch in iterator:
        optimizer.zero_grad()
        outputs = seq2seq(batch['input'], batch['output'])
        batch_label=batch['output'].transpose(0,1)
        batch_size=len(batch['output'])
        
        _, predicted = torch.max(outputs, dim=2)
        outputs_flatten = outputs.view(-1, outputs.shape[-1])
        trg_flatten = batch_label.reshape(-1)
        

        trg_flatten.requires_grad=False
        loss = criterion(outputs_flatten, trg_flatten)
        correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
        #___________
        
        correct+=correct_temp
        correct_char+=correct_chars_temp
        tot_char+=tot_chars_temp
        
        
        #_______________
        

        loss.backward()
        optimizer.step()
        

        epoch_loss += loss.item()
        


    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char

In [None]:
def evaluate(seq2seq, iterator, criterion):
    seq2seq.eval()

    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    with torch.no_grad():
        for batch in iterator:
            outputs = seq2seq(batch['input'], batch['output'],teacher_forcing_ratio=0)
            batch_label=batch['output'].transpose(0,1)
            batch_size=len(batch['output'])


            _, predicted = torch.max(outputs, dim=2)
            #print('wow_preds',predicted.shape)

            outputs_flatten = outputs.view(-1, outputs.shape[-1])
            trg_flatten = batch_label.reshape(-1)

            loss = criterion(outputs_flatten, trg_flatten)
            
            correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
            #___________

            correct+=correct_temp
            correct_char+=correct_chars_temp
            tot_char+=tot_chars_temp

            #_______________       
            
            epoch_loss += loss.item()
            

    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char



In [None]:
def epoch_time(start_time, end_time):
    e_time = end_time - start_time
    mins = e_time // 60
    secs = e_time%60
    return mins, secs,

In [None]:
def count_params(model):
    return sum(param.numel() for param in model.parameters() if param.requires_grad)

In [None]:
import time
import random

In [None]:
import torch.optim as optim
def make_model(train_iterator,valid_iterator,N_EPOCHS=20):
    E=Encoder(30,128,128,2,'lstm',0.27,False)
    E=E.to(device)
    D=Decoder(68,128,128,2,'lstm',0.27,False)
    D=D.to(device)
    S=Seq2Seq(E,D,device)
    S.to(device)    
    print(f'The model has {count_params(S):,} trainable parameters')
    
    optimizer = optim.Adam(S.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=hindi_alphabet_to_index['.'])
    criterion=criterion.to(device)
    
    best_valid_loss = float('inf')
#     return S

    for epoch in range(N_EPOCHS):    
        start_time = time.time()
        train_loss,train_acc,train_stuff = train(S, train_iterator, optimizer, criterion)
        valid_loss,valid_acc,val_stuff = evaluate(S, valid_iterator, criterion)
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(S.state_dict(), 'model1.pt')

        # it's easier to see a change in perplexity between epoch as it's an exponential
        # of the loss, hence the scale of the measure is much bigger
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs:.2f}s')
        print(f'\t Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        print(f'\t Relaxed Train. Acc: {train_stuff*100:.2f}% | Relaxed Val. Acc: {val_stuff*100:.2f}%')
        
    return S

    

In [None]:
train_iterator=train_dataloader
valid_iterator=val_dataloader
SS=make_model(train_iterator,valid_iterator,N_EPOCHS=15)

In [None]:
a=next(iter(train_dataloader))

In [None]:
bya=SS(a['input'],a['output'])

In [None]:
a['output'].shape

In [None]:
bya.shape

In [None]:
import math

In [None]:
def word_from_torchies(torchie1,index_toalp):
    torchie=torchie1.cpu().numpy()
    return word_from_vecs(torchie,index_toalp,False)

In [None]:
def word_from_batch(batch):
    wordlet=[]
    for i in range(len(batch)):
        wordlet.append(word_from_torchies(batch[i],index_to_hindi_alphabet))
    return wordlet
        

In [None]:
word_from_torchies(bya.transpose(0,1).argmax(2)[0],index_to_hindi_alphabet)

In [None]:
index_to_hindi_alphabet[65]

In [None]:
def make_wrd(stuff):
    asa=[]
    for k in stuff.cpu().numpy():
        asa.append(index_to_hindi_alphabet[k])
    return "".join(asa)
    

In [None]:
make_wrd(bya.transpose(0,1).argmax(2)[10])

In [None]:
bya.transpose(0,1).argmax(2)[10]

In [None]:
for i in range(16):
    print('................')
    print(i)
    print(word_from_torchies(a['output'][i],index_to_hindi_alphabet),\
         '---',\
         make_wrd(bya.transpose(0,1).argmax(2)[i])\
         )


In [None]:
word (self, source_batch,target_batch):
    max_len, batch_size = target_batch.shape
    outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)


            # last hidden & cell state of the encoder is used as the decoder's initial hidden state
    hidden, cell = self.encoder(source_batch)

    wordet=[]


    trg = torch.tensor(hindi_alphabet_to_index['<'])
    trg=trg.to(device)
    wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))
    for i in range(1, max_len):
        prediction, hidden, cell = self.decoder(trg, hidden, cell)
        outputs[i] = prediction
        trg = prediction.argmax(1)
        wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))


    return ''.join(wordet)

In [None]:
word_from_vecs(X_valid[0],index_to_english_alphabet,False)

In [None]:
word_from_vecs(y_valid[0],index_to_hindi_alphabet,False)

In [None]:
next(iter(train_dataloader))

In [None]:
            for j in range(predicted.shape[1]):
                predicted_seq = predicted[:, j]
                targets_seq = target_seq[:, j]

                # Find the index of the first EOS token in the sequence
                eos_idx = (targets_seq == hin_token_map["\n"]).nonzero()
                if eos_idx.numel() > 0:
                    eos_idx = eos_idx[0][0]
                    predicted_seq = predicted_seq[:eos_idx]
                    targets_seq = targets_seq[:eos_idx]

In [None]:
s