In [106]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re  
import random
import numpy as np
import os
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import collections
from itertools import dropwhile
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

PATH = "./"

In [107]:
#!pip3 install sacrebleu
from sacrebleu import corpus_bleu

In [108]:
#read in chinese-english pairs
#read in chinese-english pairs
lines_zh = open(PATH+'iwslt-zh-en/train.tok.zh',encoding = 'utf-8').read().strip().split('\n')
lines_en = open(PATH+'iwslt-zh-en/train.tok.en',encoding = 'utf-8').read().strip().split('\n')
lines_zh_test = open(PATH+'iwslt-zh-en/test.tok.zh',encoding = 'utf-8').read().strip().split('\n')
lines_en_test = open(PATH+'iwslt-zh-en/test.tok.en',encoding = 'utf-8').read().strip().split('\n')
lines_zh_val = open(PATH+'iwslt-zh-en/dev.tok.zh',encoding = 'utf-8').read().strip().split('\n')
lines_en_val = open(PATH+'iwslt-zh-en/dev.tok.en',encoding = 'utf-8').read().strip().split('\n')

In [109]:
def delect_least_common_words(list_sent, threshold = 5):
    ret_list =[]
    for x in list_sent:
        ret_list += x.split()
    ret_dic = collections.Counter(ret_list)
    
    #print (ret_dic["&amp;"])
    #print (ret_dic["&apos;"])
    #print (ret_dic["&quot;"])
    #print (ret_dic["&#91"])
    for key, count in dropwhile(lambda key_count: key_count[1] >= threshold, ret_dic.most_common()):
        
        del ret_dic[key]
        
        
    return list(ret_dic.keys())

In [110]:
zh_words = delect_least_common_words(lines_zh)
en_words = delect_least_common_words(lines_en)

In [111]:
words_to_load = 100000
PAD_IDX = 0
UNK_IDX = 1
SOS_IDX = 2
EOS_IDX = 3

with open('cc.zh.300.vec') as f:
    loaded_embeddings_ft = np.zeros((words_to_load+3, 300))
    words_ft = {}
    idx2words_ft = {}
    ordered_words_ft = []
    ordered_words_ft.extend(['<pad>', '<unk>', '<s>'])
    loaded_embeddings_ft[0,:] = np.zeros(300)
    loaded_embeddings_ft[1,:] = np.random.normal(size = 300)
    loaded_embeddings_ft[2,:] = np.random.normal(size = 300)
    for i, line in enumerate(f):
        if i >= words_to_load: 
            break
        s = line.split()
        loaded_embeddings_ft[i+3, :] = np.asarray(s[1:])
        words_ft[s[0]] = i+3
        idx2words_ft[i+3] = s[0]
        ordered_words_ft.append(s[0])
    length = len(np.setdiff1d(zh_words, ordered_words_ft))
    tmp_embeddings = np.zeros((length, 300))
    for idx, word in enumerate(np.setdiff1d(zh_words, ordered_words_ft)):
        words_ft[word] = idx+words_to_load+3
        idx2words_ft[idx+words_to_load+3] = word
        tmp_embeddings[idx, :] = np.random.normal(size = 300)
    loaded_embeddings_ft = np.concatenate((loaded_embeddings_ft, tmp_embeddings), axis = 0)
    words_ft['<pad>'] = PAD_IDX
    words_ft['<unk>'] = UNK_IDX
    words_ft['<s>'] = SOS_IDX
    idx2words_ft[PAD_IDX] = '<pad>'
    idx2words_ft[UNK_IDX] = '<unk>'
    idx2words_ft[SOS_IDX] = '<s>'
    ordered_words_ft = list(words_ft.keys())

In [112]:
#English embedding
with open('wiki-news-300d-1M.vec') as f:
    loaded_embeddings_ft_en = np.zeros((words_to_load+4, 300))
    words_ft_en = {}
    idx2words_ft_en = {}
    ordered_words_ft_en = []
    ordered_words_ft_en.extend(['<pad>', '<unk>', '<s>', '</s>'])
    loaded_embeddings_ft_en[0,:] = np.zeros(300)
    loaded_embeddings_ft_en[1,:] = np.random.normal(size = 300)
    loaded_embeddings_ft_en[2,:] = np.random.normal(size = 300)
    loaded_embeddings_ft_en[3,:] = np.random.normal(size = 300)
    for i, line in enumerate(f):
        if i >= words_to_load: 
            break
        s = line.split()
        loaded_embeddings_ft_en[i+4, :] = np.asarray(s[1:])
        words_ft_en[s[0]] = i+4
        idx2words_ft_en[i+4] = s[0]
        ordered_words_ft_en.append(s[0])
    length = len(np.setdiff1d(en_words, ordered_words_ft_en))
    tmp_embeddings = np.zeros((length, 300))
    for idx, word in enumerate(np.setdiff1d(en_words, ordered_words_ft_en)):
        words_ft_en[word] = idx+words_to_load+4
        idx2words_ft_en[idx+words_to_load+4] = word
        tmp_embeddings[idx, :] = np.random.normal(size = 300)
    loaded_embeddings_ft_en = np.concatenate((loaded_embeddings_ft_en, tmp_embeddings), axis = 0)
    words_ft_en['<pad>'] = PAD_IDX
    words_ft_en['<unk>'] = UNK_IDX
    words_ft_en['<s>'] = SOS_IDX
    words_ft_en['</s>'] = EOS_IDX
    idx2words_ft_en[PAD_IDX] = '<pad>'
    idx2words_ft_en[UNK_IDX] = '<unk>'
    idx2words_ft_en[SOS_IDX] = '<s>'
    idx2words_ft_en[EOS_IDX] = '</s>'
    ordered_words_ft_en = list(words_ft_en.keys())

In [113]:
#add sos and eos in each sentence
def add_sos_eos(lines):
    
    train = []
    for l in lines:
        l = '<s> ' + l + ' </s>'
        train.append(l)
    return train
zh_train = add_sos_eos(lines_zh)    
en_train = add_sos_eos(lines_en)
zh_test = add_sos_eos(lines_zh_test)
en_test = add_sos_eos(lines_en_test)
zh_val = add_sos_eos(lines_zh_val)
en_val = add_sos_eos(lines_en_val)

In [114]:
# convert token to id in the dataset
def token2index_dataset(tokens_data,eng = False):
    indices_data = []
    for tokens in tokens_data:
        index_list = []
        for token in tokens.split():
            if eng == False:
                try:
                    index_list.append(words_ft[token])
                except KeyError:
                    index_list.append(UNK_IDX)
            else:
                try:
                    index_list.append(words_ft_en[token])
                except KeyError:
                    index_list.append(UNK_IDX)
        indices_data.append(index_list)
    return indices_data

In [115]:
zh_train_indices = token2index_dataset(zh_train)
en_train_indices = token2index_dataset(en_train,eng = True)
zh_val_indices = token2index_dataset(zh_val)
en_val_indices = token2index_dataset(en_val,eng = True)
zh_test_indices = token2index_dataset(zh_test)
en_test_indices = token2index_dataset(en_test,eng = True)

In [116]:
#max_sentence_length
length_of_en = [len(x.split()) for x in en_train]
max_sentence_length_en = sorted(length_of_en)[-int(len(length_of_en)*0.01)]
length_of_zh = [len(x.split()) for x in zh_train]
max_sentence_length_zh = sorted(length_of_zh)[-int(len(length_of_zh)*0.01)]



In [117]:
max_sentence_length_zh

69

In [118]:
#Create Data Loader
import torch
from torch.utils.data import Dataset

class load_dataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """
    
    def __init__(self, data_list_s1,data_list_s2):
        """
        @param data_list_zh: list of Chinese tokens 
        @param data_list_en: list of English tokens as TARGETS
        """
        self.data_list_s1 = data_list_s1
        self.data_list_s2 = data_list_s2
        
        assert (len(self.data_list_s1) == len(self.data_list_s2))

    def __len__(self):
        return len(self.data_list_s1)
        
    def __getitem__(self, key):
        """
        Triggered when you call dataset[i]
        """
        
        token_idx_s1 = self.data_list_s1[key][:max_sentence_length_zh]
        token_idx_s2 = self.data_list_s2[key][:max_sentence_length_en]
        return [token_idx_s1, token_idx_s2, len(token_idx_s1), len(token_idx_s2)]

def collate_func(batch):
    """
    Customized function for DataLoader that dynamically pads the batch so that all 
    data have the same length
    """
    data_list_s1 = []
    data_list_s2 = []
    length_list_s1 = []
    length_list_s2 = []
    for datum in batch:
        length_list_s1.append(datum[2])
        length_list_s2.append(datum[3])
        padded_vec_zh = np.pad(np.array(datum[0]), 
                                pad_width=((0,max_sentence_length_zh-datum[2])), 
                                mode="constant", constant_values=0)
        padded_vec_en = np.pad(np.array(datum[1]), 
                                pad_width=((0,max_sentence_length_en-datum[3])), 
                                mode="constant", constant_values=0)
        data_list_s1.append(padded_vec_zh[:max_sentence_length_zh])
        data_list_s2.append(padded_vec_en[:max_sentence_length_en])
    #print(type(data_list_s1[0]))
    if torch.cuda.is_available and torch.has_cudnn:
        return [torch.from_numpy(np.array(data_list_s1)).cuda(), torch.from_numpy(np.array(data_list_s2)).cuda(),
                torch.LongTensor(length_list_s1).cuda(), torch.LongTensor(length_list_s2).cuda()]
    else:    
        return [torch.from_numpy(np.array(data_list_s1)), torch.from_numpy(np.array(data_list_s2)),
                torch.LongTensor(length_list_s1), torch.LongTensor(length_list_s2)]
    


In [122]:
BATCH_SIZE = 50
EMBEDDING_SIZE = 300 # fixed as from the input embedding data

train_dataset = load_dataset(zh_train_indices, en_train_indices)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=True)

val_dataset = load_dataset(zh_val_indices, en_val_indices)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=False)


test_dataset = load_dataset(zh_test_indices, en_test_indices)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE,
                                           collate_fn=collate_func,
                                           shuffle=False)

### With Attention

In [52]:
class EncoderRNN(nn.Module):
    def __init__(self, emb_dim, hidden_size, embed= torch.from_numpy(loaded_embeddings_ft).float(),num_layers=1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.emb_dim = emb_dim
        self.num_layers = num_layers 
        
        # freeze needs to set to be false as we need the random embeddings to train with the pretrained embeddings
        self.embedding = nn.Embedding.from_pretrained(embed, freeze=False)
        self.gru = nn.GRU(emb_dim, hidden_size,num_layers=num_layers,batch_first=True,bidirectional = True)

    def forward(self, data, hidden):
        
        batch_size, seq_len = data.size()
        
        embed = self.embedding(data)
        output, hidden = self.gru(embed,hidden)
#         hidden = torch.cat((hidden[0:1,:,:], hidden[1:2,:,:]), 2)
        hidden = torch.sum(hidden, dim = 0).unsqueeze(0)
        output = (output[:, :, :self.hidden_size] +
                output[:, :, self.hidden_size:])
        #hidden = [n layers * n directions =1 , batch_size, hidden_size ]
        return output, hidden

    # initialize the hidden with random numbers
    def initHidden(self,batch_size):
        return torch.randn(2*self.num_layers, batch_size, self.hidden_size,device=device)

In [53]:
class AttnDecoderRNN(nn.Module):
    def __init__(self,emb_dim,hidden_size, output_size, embed= torch.from_numpy(loaded_embeddings_ft_en).float(),num_layers=1,
                 dropout_p=0.1, max_length=max_sentence_length_zh):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers 
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding.from_pretrained(embed, freeze=False)
        self.attn = nn.Linear(self.hidden_size, self.hidden_size)
        self.attn_combine = nn.Linear(self.hidden_size *2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)

        self.gru = nn.GRU(emb_dim, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, data, hidden,encoder_outputs):
        
        ### embed: [1 * batch size * emb_dim = 300 ] ###
        ### hidden: [1 * batch size * hidden_size = 300 ] ###
        ### encoder_outputs: [batch size * max_sentence_length_zh * hidden_size = 300 ] ###
        ### 因为这里concat之后，attn layer 他给的是 hidden size *2 
        ### 所以我这儿的hidden size就只能写300了 
        
        embed = self.embedding(data)
        embed = self.dropout(embed)
 
        ### torch.cat((embed, hidden), 2)  
        ### [1 * batch size * (emb_dim + hidden_size) ]
        
        ### attn_weights: [1 * batch size * max_sentence_length_zh ]###
        ### attn_weights[0].unsqueeze(1): [batch size * 1 * max_sentence_length_zh ]###
        
        ### softmax dim=2 因为最后一个dimension是 词组什么的，不能是1，1的话就是
        ### 不同batch间这样比较了？
        #hidden = [1 * batch_size * emb_dim]
        gru_out, hidden = self.gru(embed, hidden)
#         attn_weights = F.softmax(
#             self.attn(torch.cat((embed[0], hidden[0]), 1)), dim=1).unsqueeze(1)
        attn_weights0 = self.attn(hidden).transpose(0,1)
        attn_prod = torch.bmm(attn_weights0, encoder_outputs.transpose(1,2))
        ### torch.bmm(attn_weights[0].unsqueeze(1),encoder_outputs).squeeze(1) :
        ### [batch size * 1 * hidden_size ]###
        ### attn_applied: [batch size * hidden_size (= 300) ] ###
#         attn_applied = torch.bmm(attn_weights,
#                                  encoder_outputs).squeeze(1)
        ### output: [batch size * hidden_size (= 300) ] ###
        ### embed[0]: [batch size * hidden_size (= 300) ] ###
        attn_weights = F.softmax(attn_prod, dim = 2)
#         print(attn_weights.shape)
        context = torch.bmm(attn_weights, encoder_outputs)
        hc = torch.cat([hidden, context.transpose(0,1)], dim =2)
        out_hc = torch.tanh(self.attn_combine(hc))
        output = self.softmax(self.out(out_hc)[0])
#         output = torch.cat((embed[0], attn_applied), 1)
        ### output: [1 * batch size * hidden_size (= 300) ] ###
#         output = self.attn_combine(output).unsqueeze(0)
        ### output: [1 * batch size * hidden_size (= 300) ] ###
#         output = F.relu(output)
        
        #print(hidden.size())
        #print(output.size())

#         output, hidden = self.gru(output, hidden)
        
#         output = self.softmax(self.out(output[0]))
        
        return output, hidden, attn_weights

    def initHidden(self,batch_size):
        return torch.randn(self.num_layers, batch_size, self.hidden_size,device=device)

In [54]:
teacher_forcing_ratio = 1
#input_tensor: list of sentence tensor
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer,
          criterion,eee):
    
    ### target_tensor [batch size, max_sentence_length_en = 73] ###
    ### target_tensor [batch size, max_sentence_length_zh = 62] ###
    batch_size_1, input_length = input_tensor.size()
    batch_size_2, target_length = target_tensor.size()
    
    
    encoder_hidden = encoder.initHidden(batch_size_1)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss = 0

    ### encoder_hidden: 1 * batch * hidden size ### 
    ### encoder_output: batch size * max_sentence_length_zh * hidden size ### 
    encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)

    decoder_input = torch.tensor(np.array([[SOS_IDX]]*batch_size_1).reshape(1,batch_size_1),device=device)
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    #print(use_teacher_forcing)
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            
            ### decoder_output: [batchsize,5000] ###
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden,encoder_output)
        
            
            loss += criterion(decoder_output, target_tensor[:,di])
            decoder_input = target_tensor[:,di].unsqueeze(0)  # Teacher forcing
            
    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden,encoder_output)
                        
            ### decoder_output [batch size, 50003]  ###
            
            ### topi is a [batch size, 1] tensor first we remove the size 1
            ### demension then we add it at the beginning using squeeze
            ### 有点脑残诶，做个转置不就好了？
            
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input
            
            ### decoder_input [1, batch size]  ###
            decoder_input = decoder_input.unsqueeze(0)
 
            loss += criterion(decoder_output, target_tensor[:,di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [55]:
from torch.optim.lr_scheduler import StepLR, LambdaLR
def trainIters(encoder, decoder, n_iters, folder,lr_decrease = False,print_every=1, plot_every=100, evaluate_every = 50,read_in_model = False,learning_rate=0.001,early_stop_tol = 10e-7):
    start = time.time()
    plot_losses = []
    plot_val = []
    
    loss_history = []
   
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    patience = 0
    
    early_stopped = False
    current_best_bleu = 0
    
    best_encoder = encoder.state_dict()
    best_decoder = decoder.state_dict()
    
    
    #--------------------------------------------	
    #	
    #    LOAD MODELS	
    #	
    #--------------------------------------------	
    	
        
    
    if not os.path.exists(folder):	
        os.makedirs(folder)	

    if read_in_model == True:
        if os.path.exists(folder+'/Encoder'):	
            print('---------------------------------------------------------------------')	
            print('----------------Readind trained model---------------------------------')	
            print('---------------------------------------------------------------------')	

            #read trained models	
            encoder.load_state_dict(torch.load(folder+"/Encoder"))
            decoder.load_state_dict(torch.load(folder+"/Decoder"))	

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    
    if lr_decrease == True:
        encoder_scheduler = StepLR(encoder_optimizer, step_size=1, gamma=0.8)
        decoder_scheduler = StepLR(decoder_optimizer, step_size=1, gamma=0.8)
    
    
    criterion = nn.CrossEntropyLoss()
    #criterion_val = nn.CrossEntropyLoss()

    last_val = 0
    for iter in range(1, n_iters + 1):
        if lr_decrease == True:
            encoder_scheduler.step()
            decoder_scheduler.step()
        for i, (data_s1, data_s2, lengths_s1, lengths_s2) in enumerate(train_loader):
            input_tensor = data_s1
            target_tensor = data_s2
            #print("train",target_tensor.size())
            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion,i)
            print_loss_total += loss
            plot_loss_total += loss

            if i % print_every == 0:
                if i != 0:
                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0
                    print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                                 iter, iter / n_iters * 100, print_loss_avg))
                    loss_history.append(print_loss_avg)
                else:
                    print_loss_total = 0
                
            if i % plot_every == 0:
                if i != 0:
                    plot_loss_avg = plot_loss_total / plot_every
                    plot_losses.append(plot_loss_avg)
                    plot_loss_total = 0
                    
                    
                else:
                    plot_loss_total = 0
                
            if i % evaluate_every == 0:
                if i != 0:
                    bleu_score,output_words,attentions = evaluate(val_loader, encoder, decoder)
                    if bleu_score > current_best_bleu:
                        current_best_bleu = bleu_score
                        
                        best_encoder = encoder.state_dict()
                        best_decoder = decoder.state_dict()
                        
                    plot_val.append(bleu_score)
                    #print ("BLEU: ",bleu_score)
                    
                    if bleu_score <= current_best_bleu:
                        patience += 1
                        
                    elif bleu_score > current_best_bleu and np.abs(bleu_score - current_best_bleu)/float(current_best_bleu) < early_stop_tol:
                        patience += 1
                    
                    else:
                        patience = 0
                        
                        
                    if patience == 10:
                       
                        torch.save(best_encoder,folder +"/Encoder")
                        torch.save(best_decoder,folder +"/Decoder")
                        early_stopped = True
                        patience = 0
            
                        
                    last_val = bleu_score
                 
        if early_stopped == False:
        
            # Save the model for every epoch
            print('---------------------------------------------------------------------')	
            print('----------------Saving trained model---------------------------------')	
            print('---------------------------------------------------------------------')	

            torch.save(encoder.state_dict(),folder +"/Encoder")
            torch.save(decoder.state_dict(),folder +"/Decoder")
            
    with open(folder+"/loss_hist", 'wb') as f:
         pkl.dump(loss_history, f)
    with open(folder+"/bleu_hist", 'wb') as f:
         pkl.dump(plot_val, f)
    showPlot(plot_losses,title = "Train Loss",name = folder+"/loss.jpeg")
    showPlot(plot_val, title = "BLEU Score on Validation Set",name = folder+"/bleu.jpeg")
    return plot_losses

In [56]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
%matplotlib inline

def showPlot(points,title,name):
    plt.figure()
    
    plt.plot(points)
    plt.title(title)
    plt.savefig(name)
    
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


In [90]:
#loader can be test_loader or val_loader
def evaluate(loader, encoder, decoder, after_train_mode = False,beam = False, beam_k = 1):
    bleu_score_list = []
    big_pred_list = []
    big_ref_list = []
    with torch.no_grad():
        for i, (data_s1, data_s2, lengths_s1, lengths_s2) in enumerate(loader):
            input_tensor = data_s1
            input_length = input_tensor.size()[0]
            #sentence_length to the output length
            sentence_length = data_s2.size()[1]
            encoder_hidden = encoder.initHidden(input_length)

            encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)
            
            #decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
            decoder_input = torch.tensor(np.array([[SOS_IDX]]*input_length).reshape(1,input_length),device=device)

            decoder_hidden = encoder_hidden

            decoder_attentions = torch.zeros(sentence_length, sentence_length)
            decoded_words_eval = []
            for di in range(sentence_length):
                decoded_words_sub = []
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_output)
                # decoder_attentions[di] = decoder_attention.data
                # topk(1) - softmax probability maximum
                if beam == True:
                    pass
#                     topv, topi = decoder_output.data.topk(beam_k)
#                     #batch loop
#                     C = []
#                     for idx, ind in enumerate(topi):
#                         H, _ = sequences[idx]
#                         for ele in ind:
#                             if ele.item() == EOS_IDX:
#                                 H.append('<EOS>')
#                             else:
#                                 H.append(idx2words_ft_en[ele.item()])
                         
                else:
                    topv, topi = decoder_output.data.topk(1) 
                    
                #batch loop
                
                
                for ind in topi:
                    
                    if ind.item() == EOS_IDX:
                        
                        decoded_words_sub.append(idx2words_ft_en[EOS_IDX])
                        
                    else:
                        decoded_words_sub.append(idx2words_ft_en[ind.item()])
                    
                
                decoded_words_eval.append(decoded_words_sub)
                
                #swap dimensions of decoded_words to [batch_size * 377]
                
                #decoded_words_new = [[i for i in ele] for ele in list(zip(*decoded_words_eval))]

                #change the dimension
                decoder_input = topi.squeeze().detach()
                decoder_input = decoder_input.unsqueeze(0)
            
            
            pred_num = 0
            listed_predictions = []
            
            
            decoded_words_new = [[i for i in ele] for ele in list(zip(*decoded_words_eval))]
            
            for token_list in decoded_words_new:
                sent = ' '.join(str(token) for token in token_list if token!="<pad>" and token!="<s>" and token!="</s>")
                #print (sent)
                listed_predictions.append(sent)
                #print (sent)
                pred_num += 1
                
            ref_num = 0
            listed_reference = []
            for ele in data_s2:
                sent = index2token_sentence(ele)
                #print (tokens)
                #sent = ' '.join(tokens)
                #print (sent)
                listed_reference.append(sent)
                ref_num += 1
            
            big_pred_list += listed_predictions
            big_ref_list += listed_reference
            
            assert len(big_pred_list) == len(big_ref_list)
            
            
            #uncommon to print prediction and reference
            #print (listed_predictions)
            #print (listed_reference)
        bleu_score = corpus_bleu(big_pred_list,[big_ref_list]).score
        
        if after_train_mode == True:
            for idx,ele in enumerate(big_pred_list):
                print (ele)
                print (big_ref_list[idx])
                print ("\n")
                
                
    print('BLEU Score is %s' % (str(bleu_score)))
        

    return bleu_score, decoded_words_new, decoder_attentions[:di + 1]
    
def index2token_batch(list_of_list):
    return ' '.join(idx2words_ft_en[r.item()] for v in list_of_list for r in v if r.item()!=PAD_IDX)
def index2token_sentence(sentence_batch):
    return ' '.join(idx2words_ft_en[sent.item()] for sent in sentence_batch if sent.item()!=PAD_IDX and sent.item()!=SOS_IDX and sent.item()!=EOS_IDX)

In [120]:
#loader can be test_loader or val_loader
def evaluate_no_unk(loader, original_ref,encoder, decoder, after_train_mode = False,beam = False, beam_k = 1):
    bleu_score_list = []
    big_pred_list = []
    big_ref_list = []
    with torch.no_grad():
        for i, (data_s1, data_s2, lengths_s1, lengths_s2) in enumerate(loader):
            input_tensor = data_s1
            input_length = input_tensor.size()[0]
            #sentence_length to the output length
            sentence_length = data_s2.size()[1]
            encoder_hidden = encoder.initHidden(input_length)
            

            encoder_output, encoder_hidden = encoder(input_tensor, encoder_hidden)
            
            #decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
            decoder_input = torch.tensor(np.array([[SOS_IDX]]*input_length).reshape(1,input_length),device=device)

            decoder_hidden = encoder_hidden

            decoder_attentions = torch.zeros(sentence_length, sentence_length)
            decoded_words_eval = []
            sequences = [[list(), 1.0]]*input_length
            
            decoder_attentions = torch.zeros(input_length,max_sentence_length_en, max_sentence_length_zh)
            #decoder_attentions = []
            for di in range(sentence_length):
                decoded_words_sub = []
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_output)
                
                decoder_attentions[:,di,:] = decoder_attention.data.squeeze(1)
                
               # decoder_attentions.append()
                # decoder_attentions[di] = decoder_attention.data
                # topk(1) - softmax probability maximum
                if beam == True:
                    pass
#                     topv, topi = decoder_output.data.topk(beam_k)
#                     #batch loop
#                     C = []
#                     for idx, ind in enumerate(topi):
#                         H, _ = sequences[idx]
#                         for ele in ind:
#                             if ele.item() == EOS_IDX:
#                                 H.append('<EOS>')
#                             else:
#                                 H.append(idx2words_ft_en[ele.item()])
                         
                else:
                    topv, topi = decoder_output.data.topk(1) 
                    
                #batch loop
                
                
                for ind in topi:
                    
                    if ind.item() == EOS_IDX:
                        
                        decoded_words_sub.append(idx2words_ft_en[EOS_IDX])
                        
                    else:
                        decoded_words_sub.append(idx2words_ft_en[ind.item()])
                    
                
                decoded_words_eval.append(decoded_words_sub)
                
                #swap dimensions of decoded_words to [batch_size * 377]
                
                #decoded_words_new = [[i for i in ele] for ele in list(zip(*decoded_words_eval))]

                #change the dimension
                decoder_input = topi.squeeze().detach()
                decoder_input = decoder_input.unsqueeze(0)
            
            
            pred_num = 0
            listed_predictions = []
            
            
            decoded_words_new = [[i for i in ele] for ele in list(zip(*decoded_words_eval))]
            
            for token_list in decoded_words_new:
                sent = ' '.join(str(token) for token in token_list if token!="<pad>" and token!="<s>" and token!="</s>")
                #print (sent)
                listed_predictions.append(sent)
                #print (sent)
                pred_num += 1
                
            ref_num = 0
            listed_reference = []
            for ele in data_s2:
                sent = index2token_sentence(ele)
                #print (tokens)
                #sent = ' '.join(tokens)
                #print (sent)
                listed_reference.append(sent)
                ref_num += 1
            
            big_pred_list += listed_predictions
            big_ref_list += listed_reference
            
            assert len(big_pred_list) == len(big_ref_list)
#             print(big_pred_list)
#             print(big_ref_list)
            
            #uncommon to print prediction and reference
            #print (listed_predictions)
            #print (listed_reference)
#         print(len(big_pred_list))
#         print(len(original_ref))
        bleu_score = corpus_bleu(big_pred_list,[original_ref]).score
        
        if after_train_mode == True:
            for idx,ele in enumerate(big_pred_list):
                print (ele)
                print (original_ref[idx])
                print ("\n")
                
                
    print('BLEU Score is %s' % (str(bleu_score)))
    
    

    return bleu_score, decoded_words_new, decoder_attentions
    
def index2token_batch(list_of_list):
    return ' '.join(idx2words_ft_en[r.item()] for v in list_of_list for r in v if r.item()!=PAD_IDX)
def index2token_sentence(sentence_batch):
    return ' '.join(idx2words_ft_en[sent.item()] for sent in sentence_batch if sent.item()!=PAD_IDX and sent.item()!=SOS_IDX and sent.item()!=EOS_IDX)

In [61]:
folder = '/scratch/yc2462/GRU_LR001_CONST_H200_ES20_zh'
hidden_size = 200
encoder1 = EncoderRNN(EMBEDDING_SIZE,hidden_size).to(device)
decoder1 = AttnDecoderRNN(EMBEDDING_SIZE,hidden_size, len(ordedred_words_ft_en)).to(device)
encoder1.load_state_dict(torch.load(folder+"/Encoder"))
decoder1.load_state_dict(torch.load(folder+"/Decoder"))

In [123]:
evaluate_no_unk(val_loader, lines_en_val, encoder1, decoder1, after_train_mode = False,beam = False, beam_k = 1)

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 

['I remember 11 years old , and remember , the night I heard her house was a pleasure .', 'My father was using his gray radio news .', 'He had a smile , and it was very rare because most of the news was just going to make him feel .', 'The Taliban were walking , and they were called &quot; The Party . &quot;', 'I don &apos;t know what that means , but I can tell you , really , really happy .', 'You can go to a real school , and he says .', 'I never forget the morning .', 'It &apos;s really a school .', 'I was six years old , and the Taliban was a legal occupation and the girl went to school , and the press was illegal .', 'So , in five years , I was living with my sister , and I went to a secret school , and I was never allowed to be alone .', 'This is the only thing we have to do for education .', 'We &apos;re going to go on a different path , and we don &apos;t have any doubts where we go .', 'We put books on the food grocery store , and we think we just want to go shopping .', 'The 



























1261
1261




BLEU Score is 13.051154308277097


(13.051154308277097,
 [['<s>',
   'This',
   'is',
   'our',
   '<unk>',
   'time',
   ',',
   'and',
   'the',
   'most',
   'valuable',
   'time',
   'we',
   'have',
   ',',
   'we',
   '&apos;re',
   'the',
   'most',
   'valued',
   'and',
   'the',
   'most',
   'expected',
   'time',
   '.',
   '</s>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>',
   '<pad>'],
  ['<s>',
   'Recently',
   ',',
   'we',
   'walked',
   'up',
   'to',
   'her',
   'journey',
   ',',
   'and',
   'she',

In [94]:
ct = 0
for i, (data_s1, data_s2, lengths_s1, lengths_s2) in enumerate(val_loader):
    ct+=data_s1.shape[0]

In [98]:
len(lines_en_val)

1261

In [None]:
class decoder_output_node:
    def __init__(self,parent, word_idx, prob_sum, isroot=False):
        self.parent = parent
        self.isroot = isroot
        self.children = []
        self.word_idx = word_idx
        self.prob_sum = prob_sum
    
    def get_children(self):
        '''
        return children
        '''
        return self.children
    
    def add_children(self, child):
        '''
        child: node
        '''
        self.children.append(child)
        return
    
    def get_parent(self):
        '''
        get parent of children
        '''
        return self.parent
    
    def get_word_idx(self):
        
        return self.word_idx
    
    def get_prob_sum(self):
        
        return self.prob_sum
    
    def is_root(self):
        return self.isroot


In [None]:
def return_sentence_sequence(child_node):
    if child_node.is_root():
        return [child_node.get_word_idx()]
    
    return return_sentence_sequence(child_node.get_parent())+[child_node.get_word_idx()]

In [None]:
def beam_search(beam_k, decoder_output, prob_sum = None, parent_node_list=None, vocab_size = len(idx2words_ft_en)):
    '''
    params:
    beam_k
    decoder_output: previous round decoder output
    parent_node_list: previous candidate word list (for only one candidate)
    
    return:
    list_of_best_k_nodes: best k nodes found in this iteration, list of list, first dim batch, second dim best k
    prob_with_sum: probabilistic matrix after sum+sortee 
    '''
    # if first word
    if parent_node_list is None:
        # initialize result
        prob_with_sum_sorted, word_idx_sorted = decoder_output.data.topk(beam_k)
        #print("ps",prob_with_sum_sorted)
        # add initialize tree list
        list_of_best_k_nodes = []
        batchsize = prob_with_sum_sorted.shape[0]
        for batch_i in range(batchsize):
            batch_i_tree_list = []
            for beam_i in range(beam_k):
                # add tree root node to list
                batch_i_tree_list.append(decoder_output_node(parent=None, word_idx= word_idx_sorted[batch_i, beam_i].item(), 
                                                            prob_sum= prob_with_sum_sorted[batch_i, beam_i].item(), isroot=True))
                
            list_of_best_k_nodes.append(batch_i_tree_list)
   
    # if not first word
    else:
        # get sorted results for all outputs
        prob = decoder_output.data
        #print(decoder_output.data.shape)
        #print(word_idx)
        
        
        # find top beam k words options
        #print("sum:",prob_sum)
        #print("curr prob:",prob)
        #print("sum:",prob+prob_sum)
        #print("in beam",prob_sum[0,:])
        
        prob_with_sum = prob+prob_sum
        #print("in beam prob", max(prob[0,:]))
        
        prob_with_sum_sorted, word_idx_sorted = torch.sort(prob_with_sum, dim=1, descending=True)
        #print("in beam prob max", prob_with_sum_sorted[0,0])
        #print("in beam prob second max", prob_with_sum_sorted[0,1])
        #print("sum sorted:", prob_with_sum_sorted)
        # add top beam k words options into tree
        batchsize = prob_with_sum_sorted.shape[0]
        
        list_of_best_k_nodes = []
        for batch_i in range(batchsize):
            batch_i_tree_list = []
            for beam_i in range(beam_k):
                #print(word_idx_sorted[batch_i, beam_i])
                #print(parent_node_list[batch_i].get_word_idx())
                child_node = decoder_output_node(parent=parent_node_list[batch_i], word_idx= word_idx_sorted[batch_i,beam_i].item(), prob_sum=prob_with_sum_sorted[batch_i,beam_i].item())
                
                # update parent node's child
                parent_node_list[batch_i].add_children(child_node)
                #save child to new list
                batch_i_tree_list.append(child_node)
            # add batch tree list to best k
            list_of_best_k_nodes.append(batch_i_tree_list)
                
    return list_of_best_k_nodes, prob_with_sum_sorted[:,:beam_k], word_idx_sorted[:,:beam_k]



In [None]:
def evaluate_with_beam_search(val_loader,original_ref,encoder1,decoder1,beam_k = 5, threshold_p=3):
    big_pred_list = []
    big_ref_list = []
    #beam_k = 5
    with torch.no_grad():
        #predictions = ''
        #references = ''
        for i, (data_s1, data_s2, lengths_s1, lengths_s2) in enumerate(val_loader):
            #print(i)
            input_tensor = data_s1
            input_length = input_tensor.size()[0]
            #sentence_length to the output length
            sentence_length = data_s2.size()[1]
            #print(sentence_length)
            encoder_hidden = encoder1.initHidden(input_length)

            encoder_output, encoder_hidden = encoder1(input_tensor, encoder_hidden)

            #decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
            decoder_input = torch.tensor(np.array([[SOS_IDX]]*input_length).reshape(1,input_length),device=device)

            decoder_hidden = encoder_hidden

            decoder_attentions = torch.zeros(sentence_length, sentence_length)
            #decoded_words_eval = []
            list_of_best_k_nodes = []

            prob_with_sum_sorted = []
            #print("outside",prob_with_sum_sorted)

            decoder_hidden_list = []
            #decoder_cell_list = []
            
            # final candidate list
            ## batch_size lists in each list
            final_candidate_list = []
            for i in range(input_length):
                final_candidate_list.append([])
            num_of_can = [0]*input_length
            num_of_batch_reach_thre = 0
            #decoder_attentions = torch.zeros(input_length,max_sentence_length_en, max_sentence_length_zh)
            for di in range(sentence_length):

                ############################################beam search###################################################
                #print(di)
                if di == 0:
                    #decoded_words_sub = []


                    decoder_output, decoder_hidden, decoder_attention = decoder1(
                                decoder_input, decoder_hidden,encoder_output)

                    # find top k candidates
                    list_of_best_k_nodes,prob_with_sum_sorted ,word_idx_sorted = beam_search(beam_k, decoder_output, parent_node_list=None)
                    
                    for idx in range(beam_k):
                        decoder_hidden_list.append(copy.deepcopy(decoder_hidden.data))
                        
                    #for idx in range(beam_k):
                    #    decoder_cell_list.append(copy.deepcopy(decoder_hidden[1].data))
                    #decoder_hidden_list = [copy.deepcopy(decoder_hidden.data)]*beam_k

                    #print("sum1",prob_with_sum_sorted)
                    #print("idx",word_idx_sorted)
                    #print(list_of_best_k_nodes[0][0].get_word_idx())
                    #print(list_of_best_k_nodes[0][1].get_word_idx())

                else:
                    # keep track of all new nodes
                    new_nodes = []
                    nodes_prob = None
                    #nodes_word_idx = None

                    # store index in previous candidate to locate position in new nodes, repeats=beam_size*beam_size
                    prev_candidate_idx = np.repeat(range(beam_k), repeats=beam_k)

                    # iterate through each node candidate from last iterations to find new candidates
                    new_decoder_hidden_list = []
                    #new_decoder_cell_list = []
                    #print('di ', di)
                    #print('first beam', prob_with_sum_sorted[0,0])
                    #print('second beam', prob_with_sum_sorted[0,1])
                    for beam_i in range(beam_k):
                        #print(word_idx_sorted.shape)
                        topi = word_idx_sorted[:,beam_i].data
                        #print("idx i",topi)

                        prob_sum = prob_with_sum_sorted[:,beam_i].view((input_length,1))
                        #print("prob sum:", prob_sum[0,0])
                        #change the dimension
                        decoder_input = topi.squeeze().detach()
                        decoder_input = decoder_input.unsqueeze(0)

                        # get decoder output
                        decoder_output, decoder_hidden_i, decoder_attention = decoder1(
                                        decoder_input, decoder_hidden_list[beam_i],encoder_output)
                        
                        new_decoder_hidden_list.append(copy.deepcopy(decoder_hidden_i.data))
                        #new_decoder_cell_list.append(copy.deepcopy(decoder_hidden_i[1].data))

                        # get beam search output
                        best_k_curr_node, prob_sum_curr_node, _ = beam_search(beam_k, decoder_output, prob_sum=prob_sum, parent_node_list=[ls[beam_i] for ls in list_of_best_k_nodes])
                        #print(word_idx_curr_node)
                        #print("current max first beam", prob_sum_curr_node[0,0])
                        #print("current max second beam", prob_sum_curr_node[0,1])
                        
                        # keep track of beam search output
                        new_nodes.append(best_k_curr_node)

                        if beam_i == 0:
                            nodes_prob = prob_sum_curr_node.data
                            #nodes_word_idx = word_idx_curr_node
                        else:
                            nodes_prob = torch.cat((nodes_prob, prob_sum_curr_node.data),dim=1)
                            #nodes_word_idx = torch.cat((nodes_word_idx, word_idx_curr_node),dim=1)

                    #print("nodes prob", nodes_prob)
                    _, sorted_idx = torch.sort(nodes_prob, dim=1, descending=True)
                    #print("nodes prob", nodes_prob[0,sorted_idx[0,0]].item())
                    #print("length",nodes_prob.shape)
                    #print(nodes_prob)
                    #print(sorted_idx)

                    #print(prev_candidate_idx)
                    #print("new nodes len:", len(new_nodes[0][0]))
                    #print("new_nodes 0",new_nodes[0])
                    #print("new_nodes 1",new_nodes[1])
                    # update 
                    #print(sorted_idx.shape)
                    for batch_i in range(input_length):
                        for beam_i in range(beam_k):
                            # find the index of which candidate it descended from
                            st_idx = sorted_idx[batch_i][beam_i].item()
                            #if batch_i == 0:
                            #    print("st_idx", prev_candidate_idx[st_idx])
                            #    print(new_decoder_hidden_list[prev_candidate_idx[st_idx]][:,batch_i,:])
                            #print("st_idx", prev_candidate_idx[st_idx])
                            # find the corresponding node, st_idx gives parent node id, batch_i gives which example, st_idx%beam_k gives which node in the existing node list
                            #if batch_i == 0:
                            #print("st_idx",st_idx)
                            update_node = new_nodes[prev_candidate_idx[st_idx]][batch_i][st_idx%beam_k]

                            list_of_best_k_nodes[batch_i][beam_i] = update_node
                            #print(batch_i)
                            #print(beam_i)
                            #print(list_of_best_k_nodes[0][0].parent.get_word_idx())

                            # update word idex, prob sum correspondingly for next iteration
                            #word_idx_sorted[batch_i][beam_i] = nodes_word_idx[batch_i][st_idx] 
                            word_idx_sorted[batch_i,beam_i] = update_node.get_word_idx()
                            prob_with_sum_sorted[batch_i,beam_i] = update_node.get_prob_sum()
                            decoder_hidden_list[beam_i][:,batch_i,:] = new_decoder_hidden_list[prev_candidate_idx[st_idx]][:,batch_i,:]
                            #decoder_cell_list[beam_i][:,batch_i,:] = new_decoder_cell_list[prev_candidate_idx[st_idx]][:,batch_i,:]
                            
                            if (update_node.get_word_idx() == EOS_IDX):
                                prob_with_sum_sorted[batch_i,beam_i] = float("-inf")
                                update_node.prob_sum = update_node.get_prob_sum()/(di+1)
                                final_candidate_list[batch_i].append(update_node)
                                num_of_can[batch_i]+=1
                                if num_of_can[batch_i] == threshold_p:
                                    num_of_batch_reach_thre += 1
                    
                    if num_of_batch_reach_thre == input_length:
                        break
                                
                    #print(list_of_best_k_nodes[0][0].get_prob_sum())
                    #print(list_of_best_k_nodes[0][0].get_word_idx())
                    #print(word_idx_sorted[0,0])
                    #print(decoder_hidden_list[1])
                    #print(decoder_hidden_list[0])
                    #print("di", di)
                    #print("best k",list_of_best_k_nodes[0])
                    #print("final", prob_with_sum_sorted)
                    #print("idx final", word_idx_sorted)
            
            #print(final_candidate_list[0][0])
            #print(final_candidate_list[1][0])
            #print(num_of_can)
            # find the best and get index
            listed_predictions = []
            for batch_i in range(input_length):
                if num_of_can[batch_i] >= threshold_p:
                    best_sequence_last_node = max(final_candidate_list[batch_i], key=lambda c: c.get_prob_sum())
                else:
                    best_sequence_last_node_noneos = None
                    for sub_i in range(beam_k):
                        best_sequence_last_node_noneos_i = list_of_best_k_nodes[batch_i][sub_i]
                        if best_sequence_last_node_noneos_i.get_word_idx() != EOS_IDX:
                            best_sequence_last_node_noneos_i.prob_sum = best_sequence_last_node_noneos_i.get_prob_sum()/sentence_length
                            best_sequence_last_node_noneos = best_sequence_last_node_noneos_i
                            break
                    if best_sequence_last_node_noneos is None:
                        raise ValueError("something is wrong")
                    
                    best_sequence_last_node = max(final_candidate_list[batch_i]+[best_sequence_last_node_noneos], key=lambda c: c.get_prob_sum())
                    
                #print("batch i ", batch_i)
                #print("best sequence",best_sequence_last_node)
                #print("next best sequence",list_of_best_k_nodes[batch_i][1].get_prob_sum())
                batch_i_word_idx = return_sentence_sequence(best_sequence_last_node)
                
                try:
                    first_ed_idx = batch_i_word_idx.index(EOS_IDX)
                except:
                    first_ed_idx = len(batch_i_word_idx)
                
                listed_predictions.append(' '.join(idx2words_ft_en[token_idx] for token_idx in batch_i_word_idx[:first_ed_idx] if (token_idx!=PAD_IDX and token_idx!=SOS_IDX and token_idx!=EOS_IDX)))
                #print(' '.join(idx2words_ft_en[token_idx] for token_idx in batch_i_word_idx ))
                #print(batch_i_word_idx)
                #print (listed_predictions)
            listed_reference = []
            for ele in data_s2:
                sent = index2token_sentence(ele)

                listed_reference.append(sent)
                #print ("\n")
                #print (sent)

            #print(listed_predictions)
            #bleu_score = corpus_bleu(listed_predictions,[listed_reference])
            #print('BLEU Score is %s' % (str(bleu_score.score)))

            big_pred_list += listed_predictions
            big_ref_list += listed_reference
            
            for idx,ele in enumerate(big_pred_list):
                print (ele)
                print (original_ref[idx])
                print ("\n")
                
            #break
            
    bleu_score = corpus_bleu(big_pred_list,[original_ref])
    print('BLEU Score is %s' % (str(bleu_score.score)))
            ############################################beam search###################################################
    return bleu_score

In [None]:
evaluate_with_beam_search(val_loader,lines_en_val,encoder1,decoder1,beam_k = 4, threshold_p=4)

In [None]:
hidden_size = 200
encoder1 = EncoderRNN(EMBEDDING_SIZE,hidden_size).to(device)
decoder1 = AttnDecoderRNN(EMBEDDING_SIZE,hidden_size, len(ordered_words_ft_en)).to(device)
epoch_size = 20
folder = 'GRU_LR005_decay_H200_ES20'
##UNCOMMENT TO TRAIN THE MODEL
trainIters(encoder1, decoder1, epoch_size, folder ,lr_decrease = True,print_every=50,plot_every = 100, evaluate_every = 250,learning_rate=0.005)

1m 20s (- 25m 29s) (1 5%) 2.3886
2m 39s (- 50m 28s) (1 5%) 1.8313
3m 58s (- 75m 27s) (1 5%) 1.7251
5m 17s (- 100m 26s) (1 5%) 1.6986
6m 36s (- 125m 25s) (1 5%) 1.7282




BLEU Score is 3.6583089830224207
8m 11s (- 155m 41s) (1 5%) 1.6468
9m 30s (- 180m 41s) (1 5%) 1.5996
10m 49s (- 205m 41s) (1 5%) 1.6232
12m 8s (- 230m 41s) (1 5%) 1.6190
13m 27s (- 255m 41s) (1 5%) 1.6218




BLEU Score is 3.603796338835871
15m 3s (- 286m 8s) (1 5%) 1.5904
16m 22s (- 311m 8s) (1 5%) 1.5467
17m 41s (- 336m 7s) (1 5%) 1.5542
19m 0s (- 361m 7s) (1 5%) 1.5194
20m 19s (- 386m 7s) (1 5%) 1.5294




BLEU Score is 1.9247031266697028
21m 55s (- 416m 38s) (1 5%) 1.5158
23m 14s (- 441m 38s) (1 5%) 1.5019
24m 33s (- 466m 38s) (1 5%) 1.4887
25m 52s (- 491m 38s) (1 5%) 1.4559
27m 11s (- 516m 38s) (1 5%) 1.4580




BLEU Score is 3.440815374037176
28m 47s (- 547m 6s) (1 5%) 1.4529
30m 6s (- 572m 5s) (1 5%) 1.4718
31m 25s (- 597m 5s) (1 5%) 1.4155
32m 44s (- 622m 5s) (1 5%) 1.4144
34m 3s (- 647m 5s) (1 5%) 1.4213




BLEU Score is 4.485316400564601
35m 39s (- 677m 26s) (1 5%) 1.4134
36m 58s (- 702m 26s) (1 5%) 1.4099
38m 17s (- 727m 26s) (1 5%) 1.3963
39m 36s (- 752m 26s) (1 5%) 1.4004
40m 55s (- 777m 27s) (1 5%) 1.3959




BLEU Score is 3.3100558327265346
42m 31s (- 807m 49s) (1 5%) 1.3931
43m 49s (- 832m 49s) (1 5%) 1.3746
45m 8s (- 857m 49s) (1 5%) 1.4131
46m 27s (- 882m 48s) (1 5%) 1.3809
47m 46s (- 907m 47s) (1 5%) 1.4008




BLEU Score is 4.601102499723088
49m 22s (- 938m 12s) (1 5%) 1.3923
50m 41s (- 963m 12s) (1 5%) 1.3367
52m 0s (- 988m 13s) (1 5%) 1.3931
53m 19s (- 1013m 13s) (1 5%) 1.3344
54m 38s (- 1038m 13s) (1 5%) 1.3555




BLEU Score is 4.966566277666917
56m 14s (- 1068m 38s) (1 5%) 1.3793
57m 33s (- 1093m 38s) (1 5%) 1.4092
58m 52s (- 1118m 39s) (1 5%) 1.3522
60m 11s (- 1143m 40s) (1 5%) 1.3631
61m 30s (- 1168m 41s) (1 5%) 1.3485




BLEU Score is 4.591230949212597
63m 6s (- 1199m 5s) (1 5%) 1.4000
64m 25s (- 1224m 5s) (1 5%) 1.3559
65m 44s (- 1249m 6s) (1 5%) 1.3584
67m 3s (- 1274m 6s) (1 5%) 1.3698
68m 22s (- 1299m 7s) (1 5%) 1.3923




BLEU Score is 5.38867492677869
69m 58s (- 1329m 36s) (1 5%) 1.3724
71m 17s (- 1354m 36s) (1 5%) 1.3298
72m 36s (- 1379m 36s) (1 5%) 1.3791
73m 55s (- 1404m 36s) (1 5%) 1.3786
75m 14s (- 1429m 36s) (1 5%) 1.3662




BLEU Score is 3.9101886264873937
76m 50s (- 1460m 1s) (1 5%) 1.3726
78m 9s (- 1485m 1s) (1 5%) 1.3332
79m 28s (- 1510m 2s) (1 5%) 1.3363
80m 47s (- 1535m 2s) (1 5%) 1.3928
82m 6s (- 1560m 2s) (1 5%) 1.3506




BLEU Score is 4.903363054785668
83m 42s (- 1590m 25s) (1 5%) 1.3544
85m 1s (- 1615m 26s) (1 5%) 1.3771
86m 20s (- 1640m 26s) (1 5%) 1.3586
87m 39s (- 1665m 26s) (1 5%) 1.3858
88m 58s (- 1690m 26s) (1 5%) 1.3390




BLEU Score is 4.903806339214065
90m 34s (- 1720m 52s) (1 5%) 1.3750
91m 53s (- 1745m 53s) (1 5%) 1.3555
93m 12s (- 1770m 53s) (1 5%) 1.3435
94m 31s (- 1795m 54s) (1 5%) 1.3527
95m 50s (- 1820m 55s) (1 5%) 1.3591




BLEU Score is 4.9597928735611605
97m 26s (- 1851m 20s) (1 5%) 1.3249
98m 45s (- 1876m 20s) (1 5%) 1.3251
100m 4s (- 1901m 21s) (1 5%) 1.3334
101m 23s (- 1926m 21s) (1 5%) 1.3405
102m 42s (- 1951m 21s) (1 5%) 1.3272




BLEU Score is 5.264438162655568
104m 18s (- 1981m 48s) (1 5%) 1.3149
105m 37s (- 2006m 48s) (1 5%) 1.3445
106m 56s (- 2031m 49s) (1 5%) 1.3437
108m 15s (- 2056m 49s) (1 5%) 1.3224
109m 34s (- 2081m 50s) (1 5%) 1.3451




BLEU Score is 5.2975284422504
111m 9s (- 2112m 7s) (1 5%) 1.3588
112m 28s (- 2137m 7s) (1 5%) 1.3463
113m 47s (- 2162m 8s) (1 5%) 1.3330
115m 6s (- 2187m 8s) (1 5%) 1.3435
116m 25s (- 2212m 8s) (1 5%) 1.3450




BLEU Score is 5.794581713506847
118m 29s (- 1066m 27s) (2 10%) 1.2645
119m 48s (- 1078m 18s) (2 10%) 1.2492
121m 7s (- 1090m 8s) (2 10%) 1.2684
122m 26s (- 1101m 59s) (2 10%) 1.2783
123m 45s (- 1113m 50s) (2 10%) 1.2696




BLEU Score is 5.386534628301124
125m 21s (- 1128m 13s) (2 10%) 1.2515
126m 40s (- 1140m 4s) (2 10%) 1.2640
127m 59s (- 1151m 55s) (2 10%) 1.2629
129m 18s (- 1163m 46s) (2 10%) 1.2425
130m 37s (- 1175m 37s) (2 10%) 1.2499




BLEU Score is 4.8915961374531935
132m 13s (- 1190m 0s) (2 10%) 1.2422
133m 32s (- 1201m 51s) (2 10%) 1.2657
134m 51s (- 1213m 42s) (2 10%) 1.2243
136m 10s (- 1225m 33s) (2 10%) 1.2430
137m 29s (- 1237m 24s) (2 10%) 1.2455




BLEU Score is 5.522007724201414
139m 6s (- 1251m 55s) (2 10%) 1.2779
140m 25s (- 1263m 46s) (2 10%) 1.2637
141m 44s (- 1275m 37s) (2 10%) 1.2513
143m 3s (- 1287m 28s) (2 10%) 1.2508
144m 22s (- 1299m 19s) (2 10%) 1.2449




BLEU Score is 5.041514639753127
145m 58s (- 1313m 42s) (2 10%) 1.2281
147m 16s (- 1325m 32s) (2 10%) 1.2594
148m 35s (- 1337m 23s) (2 10%) 1.2400
149m 54s (- 1349m 14s) (2 10%) 1.2628
151m 13s (- 1361m 5s) (2 10%) 1.2597




BLEU Score is 5.538064249548563
152m 49s (- 1375m 28s) (2 10%) 1.2383
154m 8s (- 1387m 19s) (2 10%) 1.2714
155m 27s (- 1399m 10s) (2 10%) 1.2723
156m 46s (- 1411m 1s) (2 10%) 1.2340
158m 5s (- 1422m 53s) (2 10%) 1.2180




BLEU Score is 5.25849218730481
159m 41s (- 1437m 16s) (2 10%) 1.2710
161m 0s (- 1449m 6s) (2 10%) 1.2512
162m 19s (- 1460m 57s) (2 10%) 1.2801
163m 38s (- 1472m 48s) (2 10%) 1.2248
164m 57s (- 1484m 39s) (2 10%) 1.2509




BLEU Score is 5.114181058967666
166m 33s (- 1499m 4s) (2 10%) 1.2357
167m 52s (- 1510m 55s) (2 10%) 1.2757
169m 11s (- 1522m 45s) (2 10%) 1.2321
170m 30s (- 1534m 36s) (2 10%) 1.2332
171m 49s (- 1546m 27s) (2 10%) 1.2442




BLEU Score is 4.921758016013677
173m 25s (- 1560m 50s) (2 10%) 1.2317
174m 44s (- 1572m 41s) (2 10%) 1.2379
176m 3s (- 1584m 32s) (2 10%) 1.2541
177m 22s (- 1596m 23s) (2 10%) 1.2601
178m 41s (- 1608m 14s) (2 10%) 1.2473




BLEU Score is 4.824717467396095
180m 17s (- 1622m 38s) (2 10%) 1.2386
181m 36s (- 1634m 29s) (2 10%) 1.2448
182m 55s (- 1646m 20s) (2 10%) 1.2421
184m 14s (- 1658m 11s) (2 10%) 1.2396
185m 33s (- 1670m 2s) (2 10%) 1.2424




BLEU Score is 5.624808906207593
187m 9s (- 1684m 24s) (2 10%) 1.2271
188m 28s (- 1696m 15s) (2 10%) 1.2538
189m 47s (- 1708m 6s) (2 10%) 1.2032
191m 6s (- 1719m 57s) (2 10%) 1.2407
192m 25s (- 1731m 48s) (2 10%) 1.2492




BLEU Score is 5.224086226534934
194m 1s (- 1746m 11s) (2 10%) 1.2397
195m 20s (- 1758m 1s) (2 10%) 1.2235
196m 39s (- 1769m 52s) (2 10%) 1.2472
197m 58s (- 1781m 43s) (2 10%) 1.2261
199m 17s (- 1793m 35s) (2 10%) 1.2188




BLEU Score is 4.849116565224457
200m 53s (- 1807m 59s) (2 10%) 1.2271
202m 12s (- 1819m 50s) (2 10%) 1.2491
203m 31s (- 1831m 40s) (2 10%) 1.2544
204m 50s (- 1843m 31s) (2 10%) 1.2163
206m 9s (- 1855m 22s) (2 10%) 1.2376




BLEU Score is 6.088085195945491
207m 45s (- 1869m 48s) (2 10%) 1.2283
209m 4s (- 1881m 39s) (2 10%) 1.2010
210m 23s (- 1893m 30s) (2 10%) 1.2234
211m 42s (- 1905m 21s) (2 10%) 1.2230
213m 1s (- 1917m 12s) (2 10%) 1.2335




BLEU Score is 5.442535932643114
214m 37s (- 1931m 36s) (2 10%) 1.2425
215m 56s (- 1943m 27s) (2 10%) 1.2168
217m 15s (- 1955m 18s) (2 10%) 1.2529
218m 34s (- 1967m 9s) (2 10%) 1.2510
219m 53s (- 1978m 59s) (2 10%) 1.2142




BLEU Score is 4.7265626721881215
221m 29s (- 1993m 25s) (2 10%) 1.2589
222m 48s (- 2005m 16s) (2 10%) 1.2446
224m 7s (- 2017m 6s) (2 10%) 1.2584
225m 26s (- 2028m 57s) (2 10%) 1.2304
226m 45s (- 2040m 48s) (2 10%) 1.2450




BLEU Score is 5.499292273300607
228m 21s (- 2055m 12s) (2 10%) 1.2210
229m 40s (- 2067m 2s) (2 10%) 1.2236
230m 59s (- 2078m 53s) (2 10%) 1.2307
232m 18s (- 2090m 44s) (2 10%) 1.2286
233m 37s (- 2102m 34s) (2 10%) 1.2350




BLEU Score is 4.897810428657348
235m 41s (- 1335m 33s) (3 15%) 1.1450
237m 0s (- 1343m 0s) (3 15%) 1.1437
238m 19s (- 1350m 28s) (3 15%) 1.1441
239m 38s (- 1357m 55s) (3 15%) 1.1403
240m 57s (- 1365m 23s) (3 15%) 1.1612




BLEU Score is 4.7644060114556925
242m 32s (- 1374m 26s) (3 15%) 1.1315
243m 51s (- 1381m 54s) (3 15%) 1.1740
245m 10s (- 1389m 21s) (3 15%) 1.1460
246m 29s (- 1396m 49s) (3 15%) 1.1474
247m 48s (- 1404m 16s) (3 15%) 1.1369




BLEU Score is 5.160198965232943
249m 24s (- 1413m 21s) (3 15%) 1.1422
250m 43s (- 1420m 49s) (3 15%) 1.1753
252m 2s (- 1428m 16s) (3 15%) 1.1474
253m 22s (- 1435m 44s) (3 15%) 1.1439
254m 41s (- 1443m 12s) (3 15%) 1.1220




BLEU Score is 5.390412713627199
256m 16s (- 1452m 16s) (3 15%) 1.1352
257m 35s (- 1459m 43s) (3 15%) 1.1517
258m 54s (- 1467m 11s) (3 15%) 1.1464
260m 13s (- 1474m 38s) (3 15%) 1.1632
261m 32s (- 1482m 6s) (3 15%) 1.1580




BLEU Score is 5.663706076197828
263m 8s (- 1491m 9s) (3 15%) 1.1425
264m 27s (- 1498m 37s) (3 15%) 1.1401
265m 46s (- 1506m 5s) (3 15%) 1.1387
267m 5s (- 1513m 33s) (3 15%) 1.1429
268m 24s (- 1521m 0s) (3 15%) 1.1547




BLEU Score is 5.736229567262459
270m 0s (- 1530m 4s) (3 15%) 1.1687
271m 19s (- 1537m 31s) (3 15%) 1.1670
272m 38s (- 1544m 59s) (3 15%) 1.1614
273m 57s (- 1552m 27s) (3 15%) 1.1509
275m 16s (- 1559m 54s) (3 15%) 1.1293




BLEU Score is 5.425942714479506
276m 53s (- 1569m 3s) (3 15%) 1.1898
278m 12s (- 1576m 31s) (3 15%) 1.1662
279m 31s (- 1583m 59s) (3 15%) 1.1570
280m 50s (- 1591m 26s) (3 15%) 1.1598
282m 9s (- 1598m 54s) (3 15%) 1.1701




BLEU Score is 5.990124475270204
283m 45s (- 1607m 57s) (3 15%) 1.1244
285m 4s (- 1615m 24s) (3 15%) 1.1446
286m 23s (- 1622m 52s) (3 15%) 1.1358
287m 42s (- 1630m 19s) (3 15%) 1.1809
289m 1s (- 1637m 47s) (3 15%) 1.1151




BLEU Score is 5.577015486470787
290m 37s (- 1646m 49s) (3 15%) 1.1399
291m 55s (- 1654m 17s) (3 15%) 1.1496
293m 14s (- 1661m 44s) (3 15%) 1.1495
294m 33s (- 1669m 12s) (3 15%) 1.1863
295m 52s (- 1676m 40s) (3 15%) 1.1707




BLEU Score is 5.76136704953008
297m 28s (- 1685m 43s) (3 15%) 1.1572
298m 47s (- 1693m 10s) (3 15%) 1.1073
300m 6s (- 1700m 37s) (3 15%) 1.1880
301m 25s (- 1708m 5s) (3 15%) 1.1810
302m 44s (- 1715m 32s) (3 15%) 1.1471




BLEU Score is 6.558736760443415
304m 20s (- 1724m 35s) (3 15%) 1.1630
305m 39s (- 1732m 3s) (3 15%) 1.1733
306m 58s (- 1739m 30s) (3 15%) 1.1516
308m 17s (- 1746m 58s) (3 15%) 1.1557
309m 36s (- 1754m 26s) (3 15%) 1.1477




BLEU Score is 5.532584339993643
311m 12s (- 1763m 29s) (3 15%) 1.1761
312m 31s (- 1770m 57s) (3 15%) 1.1725
313m 50s (- 1778m 24s) (3 15%) 1.1579
315m 9s (- 1785m 52s) (3 15%) 1.1612
316m 28s (- 1793m 19s) (3 15%) 1.1569




BLEU Score is 5.710771910042182
318m 4s (- 1802m 22s) (3 15%) 1.1466
319m 23s (- 1809m 50s) (3 15%) 1.1495
320m 42s (- 1817m 18s) (3 15%) 1.1417
322m 1s (- 1824m 46s) (3 15%) 1.1265
323m 20s (- 1832m 13s) (3 15%) 1.1547




BLEU Score is 5.362531296047899
324m 56s (- 1841m 17s) (3 15%) 1.1639
326m 15s (- 1848m 45s) (3 15%) 1.1484
327m 34s (- 1856m 12s) (3 15%) 1.1481
328m 53s (- 1863m 40s) (3 15%) 1.1527
330m 11s (- 1871m 7s) (3 15%) 1.1478




BLEU Score is 5.571841368086334
331m 47s (- 1880m 11s) (3 15%) 1.1252
333m 6s (- 1887m 38s) (3 15%) 1.1357
334m 25s (- 1895m 6s) (3 15%) 1.1574
335m 44s (- 1902m 34s) (3 15%) 1.1842
337m 3s (- 1910m 1s) (3 15%) 1.1470




BLEU Score is 5.512970523407826
338m 39s (- 1919m 4s) (3 15%) 1.1866
339m 58s (- 1926m 32s) (3 15%) 1.1729
341m 17s (- 1933m 59s) (3 15%) 1.1891
342m 36s (- 1941m 27s) (3 15%) 1.1225
343m 55s (- 1948m 54s) (3 15%) 1.1529




BLEU Score is 6.395609752968853
345m 31s (- 1958m 0s) (3 15%) 1.1564


In [None]:
hidden_size = 300
encoder1 = EncoderRNN(EMBEDDING_SIZE,hidden_size).to(device)
decoder1 = AttnDecoderRNN(EMBEDDING_SIZE,hidden_size, len(ordered_words_ft_en)).to(device)
epoch_size = 20
folder = './attention_model/GRU_LR001_decay_H300_ES20'
##UNCOMMENT TO TRAIN THE MODEL
trainIters(encoder1, decoder1, epoch_size, folder ,lr_decrease = True,print_every=50,plot_every = 100, evaluate_every = 250,learning_rate=0.001)

In [29]:
score_list, output_words, attentions = evaluate(val_loader, encoder1, decoder1,after_train_mode =True)

NameError: name 'encoder1' is not defined