# Saliency Calculation

In [2]:
from rinokeras.models.transformer import Transformer
import tensorflow.contrib.eager as tfe
import tensorflow.keras.backend as K
from collections import namedtuple
from collections import Counter
from datetime import datetime
from Dataset import Dataset
import tensorflow as tf
import numpy as np
import tqdm, json
import time
import os

%load_ext autoreload
%autoreload 2

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

dataset = Dataset("summaries_testing.0.15.msgpack")
dataset.load()


input_length = 400; output_length = 110
n_layers = 10
dim = 512
d_filter = 2048
n_vocab = len(dataset.vocab)

model_file = "summarize_10L_dataset_0.15_copy7_ckpt.h5"

# Non-eager

Currently a bit hacky, but will get the saliency matrix (dim = (110, 400)). Because the GPU cannot load the full graph into memory, we use batching below. Instructions for use:

1. Run the following cell. 
2. Run the 2nd cell, starting with it = 0 (suggested schedule for it is [0, 20, 40, 60, 80] - on last iteration, set "for j in range(30)" and "self.layer_activations.append([activations] * 30)"). 
3. After running this 2nd cell, the graph has been built. Then run the following cell with everything uncommented as shown below. This generates the first 20 rows of the saliency map. On the final step, change to "for j in range(30)"
4. Repeat steps 2 and 3, with the following changes: Update "it" to the next step in the above array (updating the lines above if it = 80). In step 3, comment out everything marked as "TO BE COMMENTED" before running, otherwise overwriting will happen. 

In [15]:
class Summarizer():
    def __init__(self, input_length, output_length, embedding, n_layers, it, d_filter=1216):
        n_vocab, dim = embedding.shape

        self.source_sequence = tf.placeholder(tf.int32,shape=(1,input_length), name="source_sequence")
        self.target_sequence = tf.placeholder(tf.int32, shape=(1,output_length),name="target_sequence")
        self.encoder_mask = tf.placeholder(tf.bool,shape=(1,input_length),name="encoder_mask")
        self.decoder_mask = tf.placeholder(tf.bool, shape=(1,output_length),name="decoder_mask")

        self.model = Transformer(discrete=True, n_symbols_in=n_vocab, d_model=dim, n_symbols_out=n_vocab, embedding_initializer=embedding, share_source_target_embedding=True, n_layers=n_layers, dropout=0.1, d_filter=d_filter)
        print("Built the Transfomer")
        
        # Train mode, get a loss, get your gradients
        self.decoded_logits = self.model(self.source_sequence, self.target_sequence, encoder_mask=self.encoder_mask, decoder_mask=self.decoder_mask)

        self.loss = tf.losses.sparse_softmax_cross_entropy(self.target_sequence, self.decoded_logits, tf.cast(self.decoder_mask, tf.float32), reduction = tf.losses.Reduction.NONE)
        print("Built train mode + loss")

        encoding_stack = self.model.encoder.encoding_stack.layers

        self.layer_activations = []
        self.layer_gradients = []
        for i in range(1):
            layer_activate, layer_grad = [], []
            activations = encoding_stack[0][i].my_output
            for j in range(20):
                gradients, = tf.gradients(self.loss[0][it+j], activations)
                layer_grad.append(gradients)
            self.layer_activations.append([activations] * 20)
            self.layer_gradients.append(layer_grad)

        print("Built encoder activations and gradients")
        
        self.beam_decoded, _ = self.model.beam_decode(self.source_sequence, 110, beam_size=8, encoder_mask=self.encoder_mask)
        print("Built beam-search decode.")

In [16]:
it = 0
embedding = np.random.randn(len(dataset.vocab), dim)
model = Summarizer(input_length, output_length, embedding, n_layers, it, d_filter=d_filter)

Built the Transfomer
Built train mode + loss
Built encoder activations and gradients
Built beam-search decode.


In [17]:
#TO BE COMMENTED
inp, inp_mask, beam_decoded, first_beam, first_beam_mask  = [], [], [], [], []

with tf.Session() as sess:
    model.model.load_weights(model_file)
    print("Reloaded previous")

    #TO BE COMMENTED - SECTION BEGINS
    inp, inp_mask = dataset.build_batch(['input', 'input_mask'], size=1)

    inp = np.array(inp)
    a = list(dataset.vocab[i] for i in inp[0])
    print(a)
    beam_decoded = sess.run(model.beam_decoded, {model.source_sequence: inp, model.encoder_mask: inp_mask})
    print(beam_decoded)
    first_beam = beam_decoded[:, 0, :]
    b = list(dataset.vocab[i] for i in np.array(first_beam[0]))
    print(b)
    first_beam_mask = first_beam != dataset.vocab.index("<PAD>")
    print(first_beam)
    print(first_beam_mask)

    print("Beam decode summary:")
    print(dataset.evaluate_sentence(first_beam[0].tolist()))
    #TO BE COMMENTED - SECTION ENDS

    feed = {model.source_sequence: inp, model.target_sequence: first_beam, model.encoder_mask: inp_mask, model.decoder_mask: first_beam_mask}
    all_vars = sess.run([model.loss] + model.layer_activations + model.layer_gradients, feed_dict=feed)

    loss = all_vars[0]
    activations = all_vars[1]
    gradients = all_vars[2]

    norms = []
    for j in range(20):
        temp_norm = np.sum(activations[j] * gradients[j], axis = 2)
        norms.append(temp_norm)
    norms = np.squeeze(np.array(norms))
    print(norms.shape)
    if os.path.isfile('saliency.npy'):
        old_norms = np.load('saliency.npy')
        norms = np.concatenate((old_norms, norms))
    np.save('saliency.npy', norms)

Reloaded previous
(1, 110)
(20, 400)


In [23]:
A = np.load('saliency.npy')
A.shape

(110, 400)