In [13]:
import copy
import pickle
import pdb
import sys
sys.path.append("..")
import os

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import sacrebleu

from utils import utils
from utils.metrics import BleuScore
from models.transformer import Transformer

In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
# paths
data_dir = '/project/cq-training-1/project2/teams/team12/data/'
best_model_path = '/project/cq-training-1/project2/submissions/team12/low-resource-translation/saved_model/Transformer-num_layers_2-d_model_128-num_heads_8-dff_512_fr_to_en_False_embedding_None_embedding_dim_128_back_translation_True_ratio_4.0'
path_en = os.path.join(data_dir, 'train.lang1')
path_fr = os.path.join(data_dir, 'train.lang2')


# Create vocabs
word2idx_en, idx2word_en = utils.create_vocab(path_en, vocab_size=None)
word2idx_fr, idx2word_fr = utils.create_vocab(path_fr, vocab_size=None)

In [16]:
valid_dataset = utils.load_training_data(path_en, path_fr, word2idx_en, word2idx_fr, seq_len=150, batch_size=64)[1]

In [5]:
# Additional methods 
def get_next(self,x,y):
    preds,_ = self.forward(x, y, training=False)
    return preds[:,-1,:], preds

def update_state(self, y_true, y_pred, vocab, idx=False):
    for i in range(len(y_true)):
        label_sentence = utils.generate_sentence(y_true[i].numpy().astype('int'), vocab)
        if idx: pred_sentence = utils.generate_sentence(y_pred[i], vocab) 
        else:   pred_sentence = utils.generate_sentence_from_probabilities(y_pred[i].numpy(), vocab)
        self.total_score += sacrebleu.sentence_bleu(pred_sentence, label_sentence, smooth_method='exp').score
        self.total_num_examples += 1

In [6]:
Transformer.get_next = get_next
BleuScore.update_state = update_state

In [7]:
# Load model
model_config = {'num_layers': 2, 'd_model': 128, 'dff': 512, 'num_heads': 8}
model = Transformer(model_config, len(word2idx_en), word2idx_fr)
model.load_weights(os.path.join(best_model_path, "model"))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2b8a35799c50>

In [8]:
def tf_beam_search(model,batch,steps,width,word2idx_fr):
    
    x,bs = batch['inputs'], batch['inputs'].shape[0] 
    # adjust for start
    hist = [np.ones((1,bs,1),dtype=np.int32)* model.start_token] 
    for i in range(1,steps): 
        # length of input = i+1 at each timestep
        hist.append(np.ones((width,bs,i+1),dtype=np.int32)) 
    hist_probs = np.zeros((steps,width,bs),dtype=np.float32)
    for i in range(1,steps): # loop over steps
        wid = hist[i-1].shape[0] # adjust for start
        # total candidates = width*width at each timestep
        cand, cand_probs = np.zeros((bs,wid*width,i+1),dtype=np.int32), np.zeros((bs,wid*width),dtype=np.float32) 
        for j in range(wid): # loop over width elements
            
            # output of prev step is current input step 
            curr, curr_probs = tf.convert_to_tensor(hist[i-1][j]), tf.convert_to_tensor(hist_probs[i-1,j])
            
            temp_idx = np.flatnonzero(curr[:,-1] == word2idx_fr['<end>']) # check for end 
            if temp_idx.size > 0: # eager tensor does not support item assigment
                temp_var = curr_probs.numpy()
                temp_var[temp_idx] += - 100
                curr_probs = tf.convert_to_tensor(temp_var)
                
            preds,_ = model.get_next(x,curr)
            preds = tf.nn.softmax(preds,-1)

            top_sort= tf.argsort(preds,axis=-1,direction='DESCENDING')    
            topk = top_sort[:,:width] # take top 'width' predictions
            unk_idx = tf.where(topk == word2idx_fr['<unk>']).numpy()
            # replace <unk> with next best 
            if unk_idx.size > 0: 
                temp = topk.numpy() # eager tensor does not support item assigment
                temp[unk_idx[:,0],unk_idx[:,1]] = top_sort.numpy()[unk_idx[:,0],width+1]
                topk = tf.convert_to_tensor(temp)
                                
            topk_probs = tf.gather(preds,topk,axis=-1,batch_dims=-1) # take top 'width' probs
            curr = tf.broadcast_to(tf.expand_dims(curr,1),(bs,width,curr.shape[-1])) # bs, width, i
            topk = tf.expand_dims(topk,-1) # shape = bs, width,1
            cand[:,j*width:(j+1)*width] = tf.concat([curr,topk],-1) # next step shape = current_shape + 1 
            cand_probs[:,j*width:(j+1)*width] = curr_probs[:,None] + np.log(topk_probs) # add log probs
        
        cand, cand_probs = tf.convert_to_tensor(cand), tf.convert_to_tensor(cand_probs)
        indices = tf.argsort(cand_probs,axis=-1,direction='DESCENDING')[:,:width] # from candidates = width*width pick width
        value = tf.gather(cand,indices,axis=1,batch_dims=1)
        hist[i] = tf.transpose(value,perm=(1,0,2)).numpy() # store next step inputs
        hist_probs[i] = tf.transpose(tf.gather(cand_probs,indices,axis=-1,batch_dims=1)).numpy() # store probs
    
    return hist, hist_probs 

In [9]:
def get_beam(model,batch,word2idx_fr,steps=134,width=5):
    options, probs = tf_beam_search(model,batch,steps,width,word2idx_fr)
    options,probs = options[1:], probs[1:]
    return options,probs

In [10]:
def process_batch(options,probs,alpha,skip):
    # length normalization
    probs = probs / (np.arange(1,134)**alpha).reshape(-1,1,1)
    options,probs = options[skip:], probs[skip:]
    preds = []
    for i in range(probs.shape[2]):
        idx = np.unravel_index(np.argmax(probs[:,:,i]),probs[:,:,i].shape)
        preds.append(options[idx[0]][idx[1],i])
    return preds

In [21]:
def check_end(ops,probs,word2idx_fr,bonus):
    # bonus scores for <end>
    for step in range(len(ops)):
        idx = np.where(ops[step][...,-1] == word2idx_fr['<end>'])
        if idx[0].size > 0:
            probs[step][idx[0],idx[1]] = probs[step][idx[0],idx[1]] + bonus*step
    return probs

In [12]:
# Do a beam search run
batch_ops, batch_probs =[], []
for batch in tqdm(valid_dataset,total = 26):    
    ops,probs = get_various_runs(model,batch,word2idx_fr,bonus=0)
    batch_ops.append(ops)
    batch_probs.append(probs)

100%|██████████| 26/26 [32:35<00:00, 75.19s/it]


In [13]:
# copy_ops = copy.deepcopy(batch_ops)
# copy_probs = copy.deepcopy(batch_probs)

In [24]:
# get all valid batches
valid_batches = []
for batch in tqdm(valid_dataset):
    valid_batches.append(batch)

26it [00:00, 103.81it/s]


In [78]:
# Try different hyper-parameters
bonus = [0,0.9,1,1.1]
alpha = [0.6,0.7,0.8,0.9,1]

In [81]:
# Adjust probs for different bonus settings
probs = []
for i,b in enumerate(bonus):
    temp_probs = []
    for j, b_ops in enumerate(batch_ops):
        new_probs = copy.deepcopy(batch_probs[j])
        new_probs = check_end(b_ops,new_probs,b)
        temp_probs.append(new_probs)
    probs.append(temp_probs)

In [82]:
bleu_beam = BleuScore()

In [90]:
# Evaluate Bleu Score
for i,b in enumerate(bonus):
    for k,alp in enumerate(alpha):
        bleu_beam.reset_states()
        for j,b_o in enumerate(batch_ops):
            b_p = copy.deepcopy(probs[i][j]) 
            preds = process_batch(b_o,b_p,alp,skip=5)
            bleu_beam.update_state(valid_batches[j]['labels'], preds, idx2word_fr, idx = True)
        print(f"Bleu Score for bonus: {b} , alpha: {alp} = {bleu_beam.result()}")
    print("\n")

Bleu Score for bonus: 0 , Alpha: 0.6 = 5.240347498592297
Bleu Score for bonus: 0 , Alpha: 0.7 = 5.538980504401233
Bleu Score for bonus: 0 , Alpha: 0.8 = 5.912006534638225
Bleu Score for bonus: 0 , Alpha: 0.9 = 6.299427800743217
Bleu Score for bonus: 0 , Alpha: 1 = 6.799250492264565


Bleu Score for bonus: 0.9 , Alpha: 0.6 = 13.788507398297382
Bleu Score for bonus: 0.9 , Alpha: 0.7 = 13.76859594097073
Bleu Score for bonus: 0.9 , Alpha: 0.8 = 13.802057637823957
Bleu Score for bonus: 0.9 , Alpha: 0.9 = 13.760269066166185
Bleu Score for bonus: 0.9 , Alpha: 1 = 13.71499271622439


Bleu Score for bonus: 1 , Alpha: 0.6 = 13.772281234631146
Bleu Score for bonus: 1 , Alpha: 0.7 = 13.83306519896875
Bleu Score for bonus: 1 , Alpha: 0.8 = 13.78764660173033
Bleu Score for bonus: 1 , Alpha: 0.9 = 13.762686680913168
Bleu Score for bonus: 1 , Alpha: 1 = 13.712780026052592


Bleu Score for bonus: 1.1 , Alpha: 0.6 = 13.70374266193462
Bleu Score for bonus: 1.1 , Alpha: 0.7 = 13.784217844868536
Bleu Score

In [17]:
# Sanity Check
score = BleuScore()
for batch in tqdm(valid_dataset,total=26):
    options, probs = get_beam(model,batch,word2idx_fr)
    probs = check_end(options,probs,word2idx_fr,bonus=1)
    preds = process_batch(options,probs,alpha=0.7,skip=5)
    score.update_state(batch['labels'], preds, idx2word_fr, idx = True)
    
print(f"BLEU Score: {score.result()}")

100%|██████████| 26/26 [34:24<00:00, 79.41s/it]

BLEU Score: 13.83306519896875





In [22]:
# Final function, with default parameters
def beam_search(model, batch, word2idx, bonus=1, alpha=0.7, skip=5):
    options, probs = get_beam(model,batch, word2idx)
    probs = check_end(options, probs, word2idx, bonus)
    preds = process_batch(options, probs, alpha, skip)
    return preds

In [26]:
# See some examples
preds = beam_search(model,valid_batches[0],word2idx_fr)
output = model(valid_batches[0])

In [27]:
idx=11
print("Source:")
print(utils.generate_sentence(valid_batches[0]['inputs'][idx].numpy(),idx2word_en))
print("\nTarget:")
print(utils.generate_sentence(valid_batches[0]['labels'][idx].numpy(),idx2word_fr))

Source:
as we know the reduction of regional disparities is one of the fundamental aims of the eu

Target:
Comme nous le savons , la disparition des disparités régionales constitue un des objectifs fondamentaux de l' ue .


In [28]:
print("Greedy Prediction:")
print(utils.generate_sentence_from_probabilities(output[idx],idx2word_fr))

print("\nBeam Search Prediction:")
print(utils.generate_sentence(preds[idx],idx2word_fr))

Greedy Prediction:
Comme nous le savons la réduction des émissions de conum , il est essentiel de réduire les émissions de conum .

Beam Search Prediction:
Comme nous le savons tous , la réduction des disparités régionales est un des objectifs fondamentaux de l' ue .
