In [1]:
# Using the model trained on the code with for loop instead of foldl for visualization

import os
import json

import tensorflow as tf
import numpy as np

from tensor2tensor import problems
from tensor2tensor import models
from tensor2tensor.bin import t2t_decoder  # To register the hparams set
from tensor2tensor.utils import registry
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import usr_dir
from tensor2tensor.data_generators import text_encoder

In [2]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

<IPython.core.display.Javascript object>

## HParams

In [3]:
#UDA_VISIBLE_DEVICES="0" t2t-trainer --t2t_usr_dir=~/EarlyStopReader/t2t_usr_dir --data_dir=~/t2t_data/algorithmic/data/ --output_dir=~/t2t_data/algorithmic/output_transformer --problem=algorithmic_sorted_string_matching --model=transformer_encoder --hparams_set=transformer_small --schedule=continuous_train_and_eval --keep_checkpoint_max=10 --train_steps=9000000 --eval_steps=20 --worker_gpu=1

problem_name = 'algorithmic_sorted_string_matching'
model_name = 'transformer_encoder'
hparams_set = 'transformer_small'
data_dir = '/Users/mostafa/Desktop/EarlyStopReader/algorithmic/data' 
t2t_usr_dir = '/Users/mostafa/Desktop/EarlyStopReader/t2t_usr_dir'

# PUT THE MODEL YOU WANT TO LOAD HERE!
CHECKPOINT = '/Users/mostafa/Desktop/EarlyStopReader/algorithmic/output_transformer' 


In [4]:
PAD = text_encoder.PAD
# tf.app.flags.DEFINE_string('f', '', 'kernel')
FLAGS = tf.flags.FLAGS
FLAGS.data_dir = data_dir
usr_dir.import_usr_dir(t2t_usr_dir)

INFO:tensorflow:Importing user module t2t_usr_dir from path /Users/mostafa/Desktop/EarlyStopReader


In [5]:
tf.reset_default_graph()

class AttentionVisualizer(object):
  """Helper object for creating Attention visualizations."""

  def __init__(
          self, hparams_set, model_name, data_dir, problem_name, beam_size=1):
    inputs, targets, samples, att_mats = build_model(
        hparams_set, model_name, data_dir, problem_name, beam_size=beam_size)
    # Fetch the problem
    problem = problems.problem(problem_name)
    encoders = problem.feature_encoders(data_dir)
    self.inputs = inputs
    self.targets = targets
    self.att_mats = att_mats
    self.samples = samples
    self.encoders = encoders

  def encode(self, input_string, input_symbol):
    """Input str to features dict, ready for inference."""
    inputs_tmp = [input_symbol] + input_string
    inputs = [i + text_encoder.NUM_RESERVED_TOKENS for i in inputs_tmp] + [text_encoder.EOS_ID]
    batch_inputs = np.reshape(inputs, [1, -1, 1, 1])  # Make it 3D.
    return batch_inputs
  
  def decode_input(self, integers):
    """List of ints to str."""
    integers = np.squeeze(integers).tolist()
    inputs_list = [str(i - text_encoder.NUM_RESERVED_TOKENS) if i!=0 else PAD for i in integers[:len(integers)-1]]
    return inputs_list
    #     return ' '.joint(inputs) + '.'

  def decode_output(self, integers):
    """int to str."""
    output = np.squeeze(integers).tolist()
    return str(output)
  
  def get_vis_data_from_string(self, sess, input_string, input_symbol):
    """Constructs the data needed for visualizing attentions.
    Args:
      sess: A tf.Session object.
      input_string: The input string.
      input_symbol: The input symbol to be searched in input_string.
    Returns:
      Tuple of (
          output: The label.
          enc_atts: enc_atts: Encoder self attention weights.
                A list of `num_layers` numpy arrays of size
                (batch_size, num_heads, inp_len, inp_len).
    """
    encoded_inputs = self.encode(input_string, input_symbol)
    
    # Run inference graph to get the label.
    out = sess.run(self.samples, {
        self.inputs: encoded_inputs,
    })

    # Run the output through the training graph to get the
    # attention tensors.
    att_mats = sess.run(self.att_mats, {
        self.inputs: encoded_inputs,
        self.targets: np.reshape(out, [1, -1, 1, 1]),
    })
    
    output = self.decode_output(out)
    input_list = self.decode_input(encoded_inputs)
    return output, input_list, att_mats


def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
  """Build the graph required to fetch the attention weights.
  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory containing training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding the output.
        If set to 1 (default) then greedy decoding is used.
  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids of the input.
        targets: Targets placeholder to feed to label when fetching
            attention weights.
        samples: Tensor representing the ids of the label.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)

  inputs = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='inputs')
  targets = tf.placeholder(tf.int32, shape=(1, 1, 1, 1), name='targets')
  model({
      'inputs': inputs,
      'targets': targets,
  })

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # inference graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(model)

  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = model.infer({
        'inputs': inputs,
    }, beam_size=beam_size)['outputs']

  return inputs, targets, samples, att_mats


def get_att_mats(model):
  """Get's the tensors representing the attentions from a build model.
  The attentions are stored in a dict on the Transformer object while building
  the graph.
  Args:
    model: Model object to fetch the attention weights from.
  Returns:
  Tuple of attention matrices; (
      enc_atts: Encoder self attention weights.
        A list of `num_layers` numpy arrays of size
        (batch_size, num_heads, inp_len, inp_len)
  )
  """
  enc_atts = []

#   prefix = 'transformer_encoder/body/'
#   postfix = '/multihead_attention/dot_product_attention'

  prefix = 'transformer_encoder/parallel_0_4/transformer_encoder/transformer_encoder/body/'
  postfix = '/multihead_attention/dot_product_attention/attention_weights:0'

  for i in range(model.hparams.num_hidden_layers):
    enc_att = tf.get_default_graph().get_tensor_by_name('%sencoder/layer_%i/self_attention%s' % (prefix, i, postfix))
#     model.attention_weights['%sencoder/layer_%i/self_attention%s' % (prefix, i, postfix)]
    enc_atts.append(enc_att)

  return enc_atts

## Visualization

In [6]:
tf.reset_default_graph()
visualizer = AttentionVisualizer(hparams_set, model_name, data_dir, problem_name, beam_size=1)

INFO:tensorflow:Unsetting shared_embedding_and_softmax_weights.
INFO:tensorflow:Setting T2TModel mode to 'eval'
INFO:tensorflow:Setting hparams.dropout to 0.0
INFO:tensorflow:Setting hparams.label_smoothing to 0.0
INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0
INFO:tensorflow:Setting hparams.symbol_dropout to 0.0
INFO:tensorflow:Setting hparams.attention_dropout to 0.0
INFO:tensorflow:Setting hparams.relu_dropout to 0.0
INFO:tensorflow:Using variable initializer: uniform_unit_scaling
INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_12_256.bottom
INFO:tensorflow:Transforming 'targets' with class_label_modality_2_256.targets_bottom
INFO:tensorflow:Building model body
MOSTAFA
Tensor("transformer_encoder/parallel_0_4/transformer_encoder/transformer_encoder/body/encoder/layer_0/self_attention/multihead_attention/dot_product_attention/attention_weights:0", shape=(1, 4, ?, ?), dtype=float32)
MOSTAFA
Tensor("transformer_encoder/parallel_0_4/transformer_en

In [7]:
tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')

sess = tf.train.MonitoredTrainingSession(
    checkpoint_dir=CHECKPOINT,
    save_summaries_secs=0,
)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /Users/mostafa/Desktop/EarlyStopReader/algorithmic/output_transformer/model.ckpt-9000000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 9000000 into /Users/mostafa/Desktop/EarlyStopReader/algorithmic/output_transformer/model.ckpt.


In [15]:

"""Module for postprocessing and displaying tranformer attentions.

This module is designed to be called from an ipython notebook.
"""

import json
import os

import IPython.display as display

import numpy as np

vis_html = """
  <span style="user-select:none">
    Layer: <select id="layer"></select>
    Attention: <select id="att_type">
      <option value="inp_inp">Input - Input</option>
    </select>
  </span>
  <div id='vis'></div>
"""


# __location__ = os.path.realpath(
#     os.path.join(os.getcwd(), os.path.dirname(__file__)))
# vis_js = open(os.path.join(__location__, 'attention.js')).read()

vis_js = tf.gfile.Open('attention.js').read()


def pad_remover(attention):
  inp_inp_atts = attention['inp_inp']
  att_array = np.array(inp_inp_atts['att'])
  top = inp_inp_atts['top_text']
  bot = inp_inp_atts['bot_text']
  pad_index = [ i for i, sent in enumerate(top) if sent.startswith(PAD)]
  start = min(pad_index)
  end = max(pad_index)
  filtered_att_d2 = np.concatenate((att_array[:,:,:start, :],att_array[:,:,end+1:, :]), axis=2)
  filtered_att = np.concatenate((filtered_att_d2[:,:,:, :start],filtered_att_d2[:,:,:, end+1:]), axis=3)
  filteredtop = top[:start] + top[end+1:]
  filteredbot = bot[:start] + bot[end+1:]
  filteredtop = [sent.replace(PAD, '').strip() for sent in filteredtop]
  filteredbot = [sent.replace(PAD, '').strip() for sent in filteredbot]
  inp_inp_atts['att'] = filtered_att.tolist()
  inp_inp_atts['top_text'] = filteredtop
  inp_inp_atts['bot_text'] = filteredbot
  attention['inp_inp'] = inp_inp_atts
  return attention
  
def show(inp_text, out_text, enc_atts):
  enc_att = resize(enc_atts)
  attention = _get_attention(
      inp_text, out_text, enc_att)
#   attention = pad_remover(attention)
  att_json = json.dumps(attention)
  _show_attention(att_json)
  return attention


def _show_attention(att_json):
  display.display(display.HTML(vis_html))
  display.display(display.Javascript('window.attention = %s' % att_json))
  display.display(display.Javascript(vis_js))


def resize(att_mat, max_length=None):
  """Normalize attention matrices and reshape as necessary."""
  for i, att in enumerate(att_mat):
    # Add extra batch dim for viz code to work.
    if att.ndim == 3:
      att = np.expand_dims(att, axis=0)
    if max_length is not None:
      # Sum across different attention values for each token.
      att = att[:, :, :max_length, :max_length]
      row_sums = np.sum(att, axis=2)
      # Normalize
      att /= row_sums[:, :, np.newaxis]
    att_mat[i] = att
  return att_mat


def _get_attention(inp_text, out_text, enc_atts):
  """Compute representation of the attention ready for the d3 visualization.

  Args:
    inp_text: list of strings, words to be displayed on the left of the vis
    out_text: list of strings, words to be displayed on the right of the vis
    enc_atts: numpy array, encoder self-attentions
        [num_layers, batch_size, num_heads, enc_length, enc_length]

  Returns:
    Dictionary of attention representations with the structure:
    {
      'inp_inp': Representations for showing encoder self-attentions
    }
    and each sub-dictionary has structure:
    {
      'att': list of inter attentions matrices, one for each attention head
      'top_text': list of strings, words to be displayed on the left of the vis
      'bot_text': list of strings, words to be displayed on the right of the vis
    }
  """

  def get_inp_inp_attention(layer):
    att = np.transpose(enc_atts[layer][0], (0, 2, 1))
    return [ha.T.tolist() for ha in att]

  def get_attentions(get_attention_fn):
    num_layers = len(enc_atts)
    attentions = []
    for i in range(num_layers):
      attentions.append(get_attention_fn(i))

    return attentions

  attentions = {
      'inp_inp': {
          'att': get_attentions(get_inp_inp_attention),
          'top_text': inp_text,
          'bot_text': inp_text,
      },
  }

  return attentions

In [40]:
input_string='1222223344555555777777777888888889999999999999999'
input_symbole='6'

In [41]:
input_string = [int(i) for i in list(input_string)]
output, input_list, att_mats = visualizer.get_vis_data_from_string(sess, list(input_string), int(input_symbole))

print(output)
# print(np.array(att_mats).shape)


0


In [42]:
inp_text = ["input symbol: %s" %input_list[0]] + input_list[1:]
out_text = [output]


# def import_js_deps():
#   publish.script_url("https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.5/require.min.js")

#   publish.javascript('''
#   requirejs.config({
#       "paths": {
#         "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
#         "jquery": "//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min",
#       }
#   });
#   ''')

# import_js_deps()

attention = show(inp_text, out_text, att_mats)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>