# Pointer Network Training
In this notebook we are going to train the pointer network with a 10% sample of the CNN dataset we processed in the previous notebook.

In [17]:
import tensorflow as tf
import numpy as np
from pointer_network import PointerNetwork
from embedding_helper import load_pretrained_embeddings

from pickle import load

In [3]:
tf.enable_eager_execution()

## Data Loading
We load all the data here. We have X for the processed stories, y for the labels, gen for remembering us if a label is to be generated or to be taken from the input, and y_raw are the same as y, without having indexes pointng to X.

In [7]:
X_train = np.load('processed/X_train.npy')
y_train = np.load('processed/y_train.npy')
gen_train = np.load('processed/gen_train.npy')
y_raw_train = np.load('processed/y_raw_train.npy')

X_val = np.load('processed/X_val.npy')
y_val = np.load('processed/y_val.npy')
gen_val = np.load('processed/gen_val.npy')
y_raw_val = np.load('processed/y_raw_val.npy')

X_test = np.load('processed/X_test.npy')
y_test = np.load('processed/y_test.npy')
gen_test = np.load('processed/gen_test.npy')
y_raw_test = np.load('processed/y_raw_test.npy')

w2id = load(open('processed/w2id.pkl', 'rb'))
id2w = load(open('processed/id2w.pkl', 'rb'))

## Shuffling and Batching

In [None]:
train_generator = tf.data.Dataset.from_tensor_slices((X_train, y_train, gen_train))
train_generator = train_generator.batch(32)
train_generator = train_generator.shuffle(1000)

val_generator = tf.data.Dataset.from_tensor_slices((X_val, y_val, gen_val))
val_generator = val_generator.batch(32)
val_generator = val_generator.shuffle(1000)

test_generator = tf.data.Dataset.from_tensor_slices((X_test, y_test, gen_test))
test_generator = test_generator.batch(32)
test_generator = test_generator.shuffle(1000)

## Model Creation
We instantiate the newtwork with this dataset parameters

In [18]:
enc_units = 128
dec_units = 128
voc_size = len(w2id.keys())
att_units = 128 
switch_units = 128
max_len = X_train.shape[1]
start_index_token = w2id['<start>']
end_index_token = w2id['<end>']
padding_char = w2id['<pad>']
ptr = PointerNetwork(enc_units, 
                     dec_units, 
                     voc_size, 
                     att_units, 
                     switch_units, 
                     max_len, 
                     start_index_token, 
                     end_index_token,
                     padding_char)

ptr.set_embeddings_layer(load_pretrained_embeddings(np.zeros((300,300))))

## Model Training

In [None]:
def init_metrics():
    metrics = {'training_loss': [],
            'training_acc': [],
            'training_top5': []}
    return metrics

val_names = ['val_loss',
              'val_acc',
              'val_top_k']

In [None]:
current_score = 0
best_score = 0
start = time.clock()
end = 0
j = 0
smooth_window = 25
mean = 0

val_names = ['val_loss',
             'val_acc',
             'val_top_k']

def init_metrics():
    metrics = {'training_loss':[],
               'training_acc': [],
               'training_top5': []}
    
    return metrics

num_iterations = int((len(train.files) / 2))
print('Start training...')
print("Number of iterations per epoch is: " + str(num_iterations))
print()

for epoch in range(epochs):
    # Init metrics to log
    metrics = init_metrics()
    
    for iteration, (X, y) in enumerate(train_generator):
        prev_time = time.time()
        # Do a train step on a single batch
        logs = model.train_on_batch(X, y)
        c_time = time.time()
        
        for metric_val, (_, lst) in zip(logs, metrics.items()):
            lst.insert(0, metric_val)
            
        if iteration > smooth_window:
            metrics_string = 'Epoch: {}'.format(epoch)
            
            for m_name, m_lst in  metrics.items():
                metrics[m_name] = metrics[m_name][:smooth_window]
                tensorboard.on_epoch_end(j, { m_name: np.mean(m_lst) })
                metrics_string += '\t{0}: {1:.2}'.format(m_name, np.mean(m_lst))
                
            # print progress
            mean = progress_eta(iteration + 1 - smooth_window, 
                                num_iterations-smooth_window,
                                prev_time, 
                                c_time,
                                mean, 
                                metrics_string)
            prev_time = c_time
            j += 1

    total_metrics = {}
    mean_metrics = {}
    
    # Compute validation in batches
    for X ,y in validation_generator:
        metrics_ = model.evaluate(X, y, verbose=0)
        
        for i, metric in enumerate(metrics_):
            try:
                total_metrics[val_names[i]].append(metric)
            except:
                total_metrics[val_names[i]] = [metric]
          
    # Average results
    for key, total_metric in total_metrics.items():
        mean_metrics[key] = np.mean(total_metric)
        
    # Log on tensorboard
    tensorboard.on_epoch_end(epoch, mean_metrics)
    
    # Check best score and swap if better
    current_score = mean_metrics['val_acc']
    
    print()

    if current_score > best_score:
        model.save_weights(base_filename + 'weights.' + str(epoch) + '-' + str(current_score) + '.hdf5')
        best_score = current_score
        print("Saved. ")
        
    print("Validation Accuracy in is {0:.6f} at epoch {1}"\
          .format(np.mean(mean_metrics['val_acc']), epoch))
    print("Validation Top K Accuracy is {0:.6f} at epoch {1}"\
          .format(np.mean(mean_metrics['val_top_k']), epoch))


tensorboard.on_train_end(None)