# Pointer Network Training
In this notebook we are going to train the pointer network with a 10% sample of the CNN dataset we processed in the previous notebook.

In [None]:
import tensorflow as tf
import numpy as np
from src import PointerNetwork
from src import load_pretrained_embeddings
from src import train_model, evaluate_model

from pickle import load

In [None]:
tf.enable_eager_execution()

## Data Loading
We load all the data here. We have X for the processed stories, y for the labels, gen for remembering us if a label is to be generated or to be taken from the input, and y_raw are the same as y, without having indexes pointng to X.

In [None]:
X_train = np.load('processed/X_train.npy')
y_train = np.load('processed/y_train.npy')
gen_train = np.load('processed/gen_train.npy').astype('float32')
y_raw_train = np.load('processed/y_raw_train.npy')

X_val = np.load('processed/X_val.npy')
y_val = np.load('processed/y_val.npy')
gen_val = np.load('processed/gen_val.npy').astype('float32')
y_raw_val = np.load('processed/y_raw_val.npy')

X_test = np.load('processed/X_test.npy')
y_test = np.load('processed/y_test.npy')
gen_test = np.load('processed/gen_test.npy').astype('float32')
y_raw_test = np.load('processed/y_raw_test.npy')

w2id = load(open('processed/w2id.pkl', 'rb'))
id2w = load(open('processed/id2w.pkl', 'rb'))

## Shuffling and Batching

In [None]:
train_generator = tf.data.Dataset.from_tensor_slices((X_train, y_train, gen_train))
train_generator = train_generator.batch(32)
train_generator = train_generator.shuffle(1000)

val_generator = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_generator = val_generator.batch(32)
val_generator = val_generator.shuffle(1000)

test_generator = tf.data.Dataset.from_tensor_slices((X_test, y_raw_test))
test_generator = test_generator.batch(32)
test_generator = test_generator.shuffle(1000)

## Model Creation
We instantiate the newtwork with this dataset parameters

In [None]:
BATCH_SIZE = 32
EPOCHS = 1

enc_units = 128
dec_units = 128
voc_size = len(w2id.keys())
att_units = 128 
switch_units = 128
max_len = X_train.shape[1]
start_index_token = w2id['<start>']
end_index_token = w2id['<end>']
padding_char = w2id['<pad>']
ptr = PointerNetwork(enc_units, 
                     dec_units, 
                     voc_size, 
                     att_units, 
                     switch_units, 
                     max_len, 
                     start_index_token, 
                     end_index_token,
                     padding_char)

ptr.set_embeddings_layer(load_pretrained_embeddings(np.zeros((voc_size,voc_size))))

## Model Training

In [None]:
metric_names = ['loss']
val_metric_names = ['rouge-1-p', 'rouge-1-r', 'rouge-1-f', 
                    'rouge-2-p', 'rouge-2-r', 'rouge-2-f', 
                    'rouge-l-p', 'rouge-l-r', 'rouge-l-f', ]
val_best_metric = 'rouge-2-f'
training_size = X_train.shape[0]

In [None]:
train_model(ptr, train_generator, val_generator, X_train.shape[0], 
            EPOCHS, BATCH_SIZE, metric_names, val_metric_names, 
            val_best_metric, smooth_window=1, weights_dir='./weights',
            log_dir='./logs')

## Model Evaluation

In [None]:
evaluate_model(ptr, test_generator, val_metric_names)