<a href="https://colab.research.google.com/github/Avishek2020/A-to-Z-Resources-for-Students/blob/master/tpf_seq2seq_with_batch_V4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

"""## Changes to be done

1. Plotting on each iteration
2. Early stop

"""

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import pandas as pd
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
#
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = { 0:"PAD" , 1: "SOS", 3: "EOS"}
        self.n_words = 3  # Count SOS and EOS
        self.max_len = 0

    def addSentence(self, sentence):
        word_len = len(sentence.split(" "))
        if(word_len > self.max_len):
            self.max_len = word_len
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1


### To Get information of No of core of GPU use nvidia-smi-L

In [None]:
# !nvidia-smi -L
!nvidia-smi

Sat Oct 31 21:34:05 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 2060   WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   62C    P0    22W /  N/A |    611MiB /  6144MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       

In [None]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters


def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

In [None]:
def readLangs(lang1, lang2, reverse=False):
    print("Reading lines...")
  # Read the file and split into lines
    lines = open('./data/simplecomplexPWKP', encoding='utf-8').read().strip().split('\n')
    
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    print(len(pairs))
    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [None]:
MAX_LENGTH = 42

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH 

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Prepare Data
def prepareData(lang1, lang2, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


input_lang, output_lang, pairs = prepareData('Simple', 'Complex', False)
print(random.choice(pairs))

Reading lines...
108016
Read 108016 sentence pairs
Trimmed to 104476 sentence pairs
Counting words...
Counted words:
Simple 68016
Complex 81469
['from the th century to the sixteenth century the devotion was propagated but it did not seem to have developed in itself .', 'from the thirteenth to the sixteenth centuries the devotion was propagated but it did not seem to have developed in itself .']


In [None]:
len(pairs)

104476

In [None]:
# Encoder & Decoder

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size , batch_size=64):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size , batch_first=True)

    def forward(self, input, hidden):
        #view(self.batch_size, 1, -1)
        embedded = self.embedding(input).view(input.shape[0], 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros( 1 , batch_size, self.hidden_size, device=device)
    
#simple Decorder additional------
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, batch_size =64):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first =True)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)




class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH , batch_size =64):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.batch_size = 64
        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size , batch_first=True)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input_tensor, hidden, encoder_outputs):
        embedded = self.embedding(input_tensor).view(input_tensor.shape[0], 1, -1)
        embedded = self.dropout(embedded)
        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[: , 0 , :], hidden[0 , : ,:]), 1)), dim=1)
        
        attn_applied = torch.bmm(attn_weights.unsqueeze(1),
                                 encoder_outputs)

        output = torch.cat((embedded[: ,0,: ], attn_applied[: , 0 , : ]), 1)
        output = self.attn_combine(output).unsqueeze(1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = F.log_softmax(self.out(output[:,0,:]), dim=-1)
        return output, hidden, attn_weights

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size ,  self.hidden_size, device=device)




In [None]:
#
def indexesFromSentence(lang, sentence):
    out_list = [ 0 for i in range(lang.max_len)  ]
    for idx , word in enumerate(sentence.split(' ')):
        out_list[idx] = lang.word2index[word]
    return out_list


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)


def tensorsFromPair(sentence, type="input"):
    if(type=="input"):
        out_tensor = tensorFromSentence(input_lang, sentence)
    else:
        out_tensor = tensorFromSentence(output_lang, sentence)
    return out_tensor

In [None]:
import time
import math
import matplotlib.pyplot as plt
#plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


### Referernce link 
https://github.com/pengyuchen/PyTorch-Batch-Seq2seq/blob/master/seq2seq_translation_tutorial.py

In [None]:
teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    
    batch_size = input_tensor.size(0)
    
    encoder_hidden = encoder.initHidden(batch_size)
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    encoder_outputs = torch.zeros(batch_size ,max_length , encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[:, ei ,: ] , encoder_hidden)
        encoder_outputs[: , ei , : ] = encoder_output[: , 0 , :]

    decoder_input = torch.tensor([[SOS_token]*batch_size], device=device)
    decoder_input = decoder_input.view(batch_size , -1)
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input , decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
            decoder_input = target_tensor[:,di,:]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input
            loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
            #if decoder_input.item() == EOS_token:
            #    break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length


# def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    
#     batch_size = input_tensor.size(0)

#     encoder_hidden = encoder.initHidden(batch_size)
#     encoder_optimizer.zero_grad()
#     decoder_optimizer.zero_grad()

#     input_length = input_tensor.size(1)
#     target_length = target_tensor.size(1)

#     encoder_outputs = torch.zeros(batch_size ,max_length , encoder.hidden_size, device=device)

#     loss = 0

#     for ei in range(input_length):
#         encoder_output, encoder_hidden = encoder(input_tensor[:, ei ,: ] , encoder_hidden)
#         encoder_outputs[: , ei , : ] = encoder_output[: , 0 , :]

#     decoder_input = torch.tensor([[SOS_token]*batch_size], device=device)
#     decoder_input = decoder_input.view(batch_size , -1)
#     decoder_hidden = encoder_hidden

#     use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

#     if use_teacher_forcing:
#             # Teacher forcing: Feed the target as the next input
#             for di in range(target_length):
#                 decoder_output, decoder_hidden, decoder_attention = decoder(
#                     decoder_input , decoder_hidden, encoder_outputs)
#                 loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
#                 decoder_input = target_tensor[:,di,:]  # Teacher forcing

#     else:
#             # Without teacher forcing: use its own predictions as the next input
#             for di in range(target_length):
#                 decoder_output, decoder_hidden, decoder_attention = decoder(
#                     decoder_input, decoder_hidden, encoder_outputs)
#                 topv, topi = decoder_output.topk(1)
#                 decoder_input = topi.squeeze().detach()  # detach from history as input
#                 loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
#                 #if decoder_input.item() == EOS_token:
#                 #    break

#         loss.backward()

#         encoder_optimizer.step()
#         decoder_optimizer.step()
        
#         avg_Loss = loss.item() / target_length
#         train_losses.append(avg_Loss)

#     return train_losses #loss.item() / target_length

In [None]:
def validate(input_tensor, target_tensor, encoder, decoder, criterion, max_length=MAX_LENGTH):
    
    batch_size = input_tensor.size(0)
    
    encoder_hidden = encoder.initHidden(batch_size)
    
    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    encoder_outputs = torch.zeros(batch_size ,max_length , encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[:, ei ,: ] , encoder_hidden)
        encoder_outputs[: , ei , : ] = encoder_output[: , 0 , :]

    decoder_input = torch.tensor([[SOS_token]*batch_size], device=device)
    decoder_input = decoder_input.view(batch_size , -1)
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input , decoder_hidden, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
            decoder_input = target_tensor[:,di,:]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input
            loss += criterion(decoder_output, target_tensor[:,di,:].view(batch_size))
            #if decoder_input.item() == EOS_token:
            #    break

    #
    return loss.item() / target_length


#### Early stopping Logic
 1) https://github.com/Bjarten/early-stopping-pytorch/blob/master/MNIST_Early_Stopping_example.ipynb
 
 2) https://charon.me/posts/pytorch/pytorch_seq2seq_1/

In [None]:
# 80 10 10 , patince
# !pip install pytorchtools
# import EarlyStopping
# from pytorchtools import EarlyStopping
# https://raw.githubusercontent.com/Bjarten/early-stopping-pytorch/master/pytorchtools.py
# patience: Number of epochs with no improvement after which training will be stopped.
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model_encoder , model_decoder):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model_encoder , model_decoder)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model_encoder , model_decoder)
            #self.save_checkpoint(val_loss, model_encoder , "encoder_model.pth")
            #self.save_checkpoint(val_loss, model_decoder , "decoder_model.pth")
            self.counter = 0

    def save_checkpoint(self, val_loss, model_encoder , model_decoder):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        #torch.save(model.state_dict(), model_path)
        torch.save(model_encoder.state_dict(),  './save_model/encoder.dict')
        torch.save(model_decoder.state_dict(),  './save_model/decoder.dict')
        #Saving model every 2 epoch wise:
        #torch.save(encoder1.state_dict(), './save_model/epoch_'+str(epoch)+'_encoder.dict')
        #torch.save(attn_decoder1.state_dict(), './save_model/epoch_'+str(epoch)+'_decoder.dict')
        self.val_loss_min = val_loss

In [None]:
# Plotting 

def showPlot(points, iters):
    plt.figure()
    fig, ax = plt.subplots()
    #this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    dest = './figures/fig_1_'+str(iters)+'.png'
    plt.savefig(dest)
    plt.close('all') 


def showIterWiseLossPlot(plot_train_iter_losses, plot_valid_iter_losses):
    fig = plt.figure(figsize=(10,8))
    plt.plot(range(1,len(plot_train_iter_losses)+1), plot_train_iter_losses, label='Training Loss')
    plt.plot(range(1,len(plot_valid_iter_losses)+1), plot_valid_iter_losses,label='Validation Loss')

    # find position of lowest validation loss
    minposs = plot_valid_iter_losses.index(min(plot_valid_iter_losses))+1 
    plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

    plt.xlabel(f"No of iter {n_iters}")
    plt.ylabel('loss')
    #plt.ylim(0, 0.5) # consistent scale
    plt.xlim(0, len(plot_train_iter_losses)+1) # consistent scale n_iters   
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()
    fig.savefig('./figures/iterwiseLoss_plot.png', bbox_inches='tight') 
    
    
def showLossEarlyPlot(avg_train_losses, avg_valid_losses):
    fig = plt.figure(figsize=(10,8))
    plt.plot(range(1,len(avg_train_losses)+1), avg_train_losses, label='Training Loss')
    plt.plot(range(1,len(avg_valid_losses)+1), avg_valid_losses,label='Validation Loss')

    # find position of lowest validation loss
    minposs = avg_valid_losses.index(min(avg_valid_losses))+1 
    plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

    plt.xlabel(f"iters - No of batch {len(train_loader)} x iter {n_iters}")
    plt.ylabel('loss')
    #plt.ylim(0, 0.5) # consistent scale
    plt.xlim(0, len(avg_train_losses)+1) # consistent scale n_iters   
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()
    fig.savefig('./figures/loss_Early_plot.png', bbox_inches='tight')



#
def trainIters(encoder, decoder, n_iters, train_loader, test_loader, valid_loader, print_every=10, plot_every=10, learning_rate=0.001):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    plot_lossBatch_total = 0
    plot_batch_losses = []
    train_loss = 0
    valid_loss = 0
    # to track the average training loss per epoch as the model trains    
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains    
    avg_valid_losses = [] 
    
    plot_train_iter_loss_avg = 0
    plot_train_iter_losses = []
    plot_valid_iter_loss_avg = 0
    plot_valid_iter_losses = []
    plot_valid_loss_total =0
    
    #patience = 0
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    best_valid_loss = float('inf') # Inf is infinity, it's a "bigger than all the other numbers" number. 

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    # training_pairs = [tensorsFromPair(random.choice(pairs)) for i in range(n_iters)]

    # data loader
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1): # 50 plotting should be done here
        cnt = 0
        
         ###################
        # train the model #
        ###################
        
        for input_tensor, target_tensor in train_loader: # 1469
            
            
            input_tensor  = [tensorsFromPair(i_sentence , "input") for i_sentence in  input_tensor ]
            
            target_tensor = [tensorsFromPair(i_sentence, "target") for i_sentence in  target_tensor ]
            
            input_tensor  = torch.stack(input_tensor , axis =0)
            target_tensor = torch.stack(target_tensor , axis=0)
        
        
            train_loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            
            print_loss_total += train_loss
            plot_loss_total += train_loss 
            
            #avg_train_loss += train_loss
            avg_train_losses.append(train_loss)
            
        if iter :
            plot_train_iter_loss_avg = plot_loss_total / len(train_loader)
            plot_train_iter_losses.append(plot_train_iter_loss_avg)
            plot_loss_total = 0
            
            
        ###################
        # validate the model #
        ###################
        
        for input_tensor, target_tensor in valid_loader: # 1469
            
            
            input_tensor  = [tensorsFromPair(i_sentence , "input") for i_sentence in  input_tensor ]
            
            target_tensor = [tensorsFromPair(i_sentence, "target") for i_sentence in  target_tensor ]
            
            input_tensor  = torch.stack(input_tensor , axis =0)
            target_tensor = torch.stack(target_tensor , axis=0)
        
        
            valid_loss = validate(input_tensor, target_tensor, encoder, decoder, criterion)
            
            avg_valid_losses.append(valid_loss)
            #avg_valid_loss += valid_loss
            plot_valid_loss_total+= valid_loss
            
        if iter :
            plot_valid_iter_loss_avg = plot_valid_loss_total / len(valid_loader)
            plot_valid_iter_losses.append(plot_valid_iter_loss_avg)
            plot_valid_loss_total = 0            
             

            ###################
        
            #print(f" Valid Loss {valid_loss} Int valid_loss {(valid_loss)}")
        
            ############################################################
            # calculate average loss over an epoch{in this case iter}  #
            ############################################################
#             avg_train_losses.append(train_loss)
#             avg_valid_losses.append(valid_loss)

            # early_stopping needs the validation loss to check if it has decresed, 
            # and if it has, it will make a checkpoint of the current model
            early_stopping(valid_loss, encoder , decoder)

            if early_stopping.early_stop:
                print("Early stopping")
                break
        
        
        
        
#         if valid_loss < best_valid_loss:
#             best_valid_loss = valid_loss
#             torch.save(model.state_dict(), 'tut1-model.pt')
        #
         
        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            #print(f'Time: {timeSince(start, iter / n_iters)} | Progress: {iter, iter / n_iters * 100}| Loss: {print_loss_avg}')
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters), iter, iter / n_iters * 100, print_loss_avg))

        #if iter % plot_every == 0:
            #plot_loss_avg = plot_loss_total / plot_every
            #plot_losses.append(plot_loss_avg)
            #plot_loss_total = 0
            
        # iter wise plotting
        #showPlot(plot_losses, iter)
        # new 
       # iterwise plot
        if iter:
            print(f" iter {iter}")
        
        #avg_train_losses.append(avg_train_loss)
        #avg_valid_losses.append(avg_valid_loss)
        
    showIterWiseLossPlot(plot_train_iter_losses, plot_valid_iter_losses)
    # Visualizing the Loss and the Early Stopping Checkpoint
    print(f" avg_train_losses {avg_train_losses} Int avg_valid_losses {(avg_valid_losses)}")
    showLossEarlyPlot(avg_train_losses, avg_valid_losses)    

In [None]:
def train_test_val(X, train_ratio, test_ratio, val_ratio):    

    ind_train = int(round(len(X)*train_ratio))
    ind_test = int(round(len(X)*(train_ratio+test_ratio)))

    X_train = X[:ind_train]
    X_test  = X[ind_train:ind_test]
    X_val   = X[ind_test:]
    
    return X_train, X_test, X_val

### Load and Batch the Data
#### Total pairs length = 104476
#### Batch Size = 64
#### 90 % Training 10 % Test
##### -size of Training Pair : 94028    size of Test pair : 10448

###### -Total No of Training Batch : 1469.1875    Total No of Test Batch : 163.25

###### -n_iters = 10 , then 10 * 1469 = 14690

In [None]:
from torch.utils.data import Dataset

def create_datasets(batch_size):
    #
    # put ratio as you wish
    train_pairs, test_pairs, val_pairs = train_test_val(pairs, 0.8, 0.1, 0.1) 
    
    print(f" Size of Train Pairs {len(train_pairs)} \
             Size of Validate Pairs {len(val_pairs)} \
             Size of Test Pairs {len(test_pairs)}")
    
    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_pairs, 
                                           batch_size=batch_size, 
                                           shuffle=False, 
                                           pin_memory=True , 
                                           drop_last=True)
    
    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(val_pairs,
                                           batch_size=batch_size, 
                                           shuffle=False, 
                                           pin_memory=True , 
                                           drop_last=True)
    
    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_pairs, 
                                          batch_size=batch_size, 
                                          shuffle=False, 
                                          pin_memory=True , 
                                          drop_last=True)
    
    
 
    
    return train_loader, test_loader, valid_loader 
 

In [None]:
75000 X 1350

In [None]:
patience = 13

In [None]:
# Define hyperparameters
#patience = 20
batch_size = 64
hidden_size = 256
n_iters  = 75000 #len(pairs) # Total no of training Pairs batch sie (len(features_train) / batch_size)

train_loader, test_loader, valid_loader = create_datasets(batch_size)

encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)

trainIters(encoder1, attn_decoder1, n_iters, train_loader, test_loader, valid_loader, print_every=5, plot_every=5, learning_rate=0.001)



 Size of Train Pairs 83581              Size of Validate Pairs 10448              Size of Test Pairs 10447
Validation loss decreased (inf --> 4.004864).  Saving model ...
 iter 1
EarlyStopping counter: 1 out of 20
 iter 2
Validation loss decreased (4.004864 --> 3.760181).  Saving model ...
 iter 3
EarlyStopping counter: 1 out of 20
 iter 4
EarlyStopping counter: 2 out of 20
40m 15s (- 603787m 58s) (5 0%) 5510.8446
 iter 5
Validation loss decreased (3.760181 --> 3.611762).  Saving model ...
 iter 6
Validation loss decreased (3.611762 --> 3.597796).  Saving model ...
 iter 7
Validation loss decreased (3.597796 --> 3.567402).  Saving model ...
 iter 8
EarlyStopping counter: 1 out of 20
 iter 9
EarlyStopping counter: 2 out of 20
80m 32s (- 603997m 12s) (10 0%) 5115.7970
 iter 10
Validation loss decreased (3.567402 --> 3.515219).  Saving model ...
 iter 11
EarlyStopping counter: 1 out of 20
 iter 12
EarlyStopping counter: 2 out of 20
 iter 13
EarlyStopping counter: 3 out of 20
 iter 14
Earl

In [None]:
#Time 00:32  pm 27-10-2020

In [None]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden(batch_size=1)

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei,:] += encoder_output[:, 0 , :].squeeze()

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
        decoder_input = decoder_input.view(1,1)
        decoder_hidden = encoder_hidden
        encoder_outputs = encoder_outputs.view(1 , encoder_outputs.shape[0] , encoder_outputs.shape[1])
        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()
            decoder_input = decoder_input.view(1,-1)

        return decoded_words, decoder_attentions[:di + 1]

def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
evaluateRandomly(encoder1, attn_decoder1)


In [None]:
# #Saving should be as simple as: 
# torch.save(encoder1.state_dict(), '/content/encoder.dict')
# torch.save(attn_decoder1.state_dict(), '/content/decoder.dict')

In [None]:
s1 = "it is found in the region pays de la loire in the sarthe department in the west of france ."
output_words, attentions = evaluate(encoder1, attn_decoder1, s1)
output_sentence = ' '.join(output_words)
print('<', output_sentence)
print('')

In [None]:
s2 = "the ahom kings reigned for close to years ."
output_words, attentions = evaluate(encoder1, attn_decoder1, s2)
output_sentence = ' '.join(output_words)
print('<', output_sentence)
print('')

In [None]:
"""
# writing to file
MyFileSimple=open('./TestData/simple_test','w')
for ss, cs in test_loader:    
    MyFileSimple.write(str(ss))
    MyFileSimple.write('\n')
MyFileSimple.close()
# complex
MyFileComplex=open('./TestData/complex_test','w')
for ss,cs in test_loader:    
    MyFileComplex.write(str(cs))
    MyFileComplex.write('\n')
MyFileComplex.close()


"""
