# Pointer Network Training
In this notebook we are going to train the pointer network with a 10% sample of the CNN dataset we processed in the previous notebook.

In [3]:
import tensorflow as tf
import numpy as np
from src import PointerNetwork
from src import load_pretrained_embeddings

from pickle import load

In [4]:
tf.enable_eager_execution()

## Data Loading
We load all the data here. We have X for the processed stories, y for the labels, gen for remembering us if a label is to be generated or to be taken from the input, and y_raw are the same as y, without having indexes pointng to X.

In [5]:
X_train = np.load('processed/X_train.npy')
y_train = np.load('processed/y_train.npy')
gen_train = np.load('processed/gen_train.npy').astype('float32')
y_raw_train = np.load('processed/y_raw_train.npy')

X_val = np.load('processed/X_val.npy')
y_val = np.load('processed/y_val.npy')
gen_val = np.load('processed/gen_val.npy').astype('float32')
y_raw_val = np.load('processed/y_raw_val.npy')

X_test = np.load('processed/X_test.npy')
y_test = np.load('processed/y_test.npy')
gen_test = np.load('processed/gen_test.npy').astype('float32')
y_raw_test = np.load('processed/y_raw_test.npy')

w2id = load(open('processed/w2id.pkl', 'rb'))
id2w = load(open('processed/id2w.pkl', 'rb'))

## Shuffling and Batching

In [6]:
train_generator = tf.data.Dataset.from_tensor_slices((X_train, y_train, gen_train))
train_generator = train_generator.batch(32)
train_generator = train_generator.shuffle(1000)

val_generator = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_generator = val_generator.batch(32)
val_generator = val_generator.shuffle(1000)

test_generator = tf.data.Dataset.from_tensor_slices((X_test, y_raw_train))
test_generator = test_generator.batch(32)
test_generator = test_generator.shuffle(1000)

## Model Creation
We instantiate the newtwork with this dataset parameters

In [7]:
BATCH_SIZE = 32
EPOCHS = 1

enc_units = 128
dec_units = 128
voc_size = len(w2id.keys())
att_units = 128 
switch_units = 128
max_len = X_train.shape[1]
start_index_token = w2id['<start>']
end_index_token = w2id['<end>']
padding_char = w2id['<pad>']
ptr = PointerNetwork(enc_units, 
                     dec_units, 
                     voc_size, 
                     att_units, 
                     switch_units, 
                     max_len, 
                     start_index_token, 
                     end_index_token,
                     padding_char)

ptr.set_embeddings_layer(load_pretrained_embeddings(np.zeros((voc_size,voc_size))))

## Model Training

In [8]:
metric_names = ['loss']
training_size = X_train.shape[0]

In [9]:
import datetime
import time
import numpy as np
import math

def progress_eta(count, total, prev_time, c_time, prev_mean, status=''):
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = min(100, round(100.0 * count / float(total), 1))
    bar = '=' * filled_len + '-' * (bar_len - filled_len)
    
    current_batch_time = c_time-prev_time
    mean = (1-1/count) * prev_mean + (1/count)*current_batch_time
    eta = int((total - count) * mean)
    eta_str = str(datetime.timedelta(seconds=eta))

    print('[{0}] {1}{2} \t{3}\tETA: {4}'.format(bar, percents, '%', status, eta_str), end='')
    print('\r', end='')
    return mean

def log_scalar(name, value):
    with tf.contrib.summary.always_record_summaries():
        tf.contrib.summary.scalar(name, value)

def setup_tensoroard(log_dir):
    summary_writer = tf.contrib.summary.create_file_writer(log_dir, flush_millis=10000)
    summary_writer.set_as_default()
    global_step = tf.train.get_or_create_global_step()
    return global_step

def train_model(model, train_generator, val_generator, training_size, epochs, batch_size,
                metric_names, best_model_metric, smooth_window=25, weights_dir='./', log_dir='./log'):
    
    assert best_model_metric in metric_names
    
    data = {
        'current_score': 0,
        'best_score': 0,
        'start': time.clock(),
        'end': 0,
        'mean': 0,
        'prev_time': 0,
        'num_iterations': math.ceil(training_size / batch_size),
        'window': smooth_window
    }
    
    # Setup Metrics and Logging
    init_metrics = lambda: dict([(name,[]) for name in metric_names])
    
    print('Start training...')
    print("Number of iterations per epoch is: " + str(data['num_iterations']))
    print()

    # Tensorboard setup
    global_step = setup_tensoroard(log_dir)
    data['global_step'] = global_step

    # Training Loop
    for epoch in range(epochs):
        # Init metrics to log
        metrics = init_metrics()

        # Start Training Epoch
        train_epoch(model, train_generator, epoch, metrics, data)
        
        # Validate Last Epoch
        val_epoch(model, val_generator, epoch, metric_names, 
                  best_model_metric, weights_dir, data)


In [10]:
def log_batch(metrics, i, epoch, data):
    if i > data['window']:
        metrics_string = 'Epoch: {}'.format(epoch)

        # Tensorboard add step
        data['global_step'].assign_add(1)

        for m_name, m_lst in  metrics.items():
            metrics[m_name] = metrics[m_name][:window]
            log_scalar(m_name, np.mean(m_lst))
            metrics_string += '\t{0}: {1:.2}'.format(m_name, np.mean(m_lst))

        # print progress
        data['mean'] = progress_eta(i + 1 - data['window'], 
                                data['num_iterations']-data['window'],
                                data['prev_time'], 
                                data['c_time'],
                                data['mean'], 
                                metrics_string)
        data['prev_time'] = data['c_time']

def train_epoch(model, train_generator, epoch, metrics, data):
    for iteration, args in enumerate(train_generator):
        data['prev_time'] = time.time()
        # Do a train step on a single batch
        logs = model.train_on_batch(*args)
        data['c_time'] = time.time()

        for metric, (_, lst) in zip(logs, metrics.items()):
            lst.insert(0, metric)
        
        log_batch(metrics, iteration, epoch, data)
        

In [11]:
def val_epoch(model, val_generator, epoch, metrics, best_model_metric, weights_dir, data):
    total_metrics = dict([('val_' + metric, []) for metric in metrics])
    mean_metrics = {}

    # Compute validation in batches
    for args in val_generator:
        metrics_ = model.evaluate(*args, verbose=0)

        for i, metric in enumerate(metrics_):
            total_metrics['val' + metrics[i]].append(metric)


    # Average results & Log on Tensorboard
    for key, total_metric in total_metrics.items():
        mean_metrics[key] = np.mean(total_metric)
        log_scalar(key, mean_metrics[key])

    # Check best score and swap if better
    data['current_score'] = mean_metrics['val' + best_model_metric]

    print()

    # Check for improvement and save the best model
    if data['current_score'] > data['best_score']:
        model.save_weights("{0}weights.{1}-{2}.hdf5"
                           .format(weights_dir, str(epoch), str(data['current_score'])))
        data['best_score'] = data['current_score']
        print("Saved. ")

    print("Validation Accuracy in is {0:.6f} at epoch {1}"\
          .format(np.mean(mean_metrics['val_acc']), epoch))
    print("Validation Top K Accuracy is {0:.6f} at epoch {1}"\
          .format(np.mean(mean_metrics['val_top_k']), epoch))

In [12]:
train_model(ptr, train_generator, val_generator, X_train.shape[0], 
            EPOCHS, BATCH_SIZE, metric_names, 'loss', weights_dir='./weights',
            log_dir='./logs')

Start training...
Number of iterations per epoch is: 223



KeyboardInterrupt: 