In [1]:
import os
import json

import tensorflow as tf
import numpy as np

from tensor2tensor import problems
from tensor2tensor import models
from tensor2tensor.bin import t2t_decoder  # To register the hparams set
from tensor2tensor.utils import registry
from tensor2tensor.utils import trainer_lib
from tensor2tensor.google.data_generators import babi_qa

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

<IPython.core.display.Javascript at 0xae2b050>

## HParams

In [0]:
# HParams
babi_task_id = 'qa3'
subset = "10k"
problem_name = 'babi_qa_sentence_task' + babi_task_id.replace("qa", "") + "_" + subset
model_name = "babi_r_transformer"
hparams_set = "r_transformer_act_step_position_timing_tiny"

data_dir = '~/babi/data/' + problem_name 

# PUT THE MODEL YOU WANT TO LOAD HERE!
CHECKPOINT = '~/babi/output/' + problem_name+ '/' + model_name +  '/' + hparams_set + '/'
print(CHECKPOINT)


In [4]:
_TASKS = {
      'qa1': 'qa1_single-supporting-fact',
      'qa2': 'qa2_two-supporting-facts',
      'qa3': 'qa3_three-supporting-facts',
      'qa4': 'qa4_two-arg-relations',
      'qa5': 'qa5_three-arg-relations',
      'qa6': 'qa6_yes-no-questions',
      'qa7': 'qa7_counting',
      'qa8': 'qa8_lists-sets',
      'qa9': 'qa9_simple-negation',
      'qa10': 'qa10_indefinite-knowledge',
      'qa11': 'qa11_basic-coreference',
      'qa12': 'qa12_conjunction',
      'qa13': 'qa13_compound-coreference',
      'qa14': 'qa14_time-reasoning',
      'qa15': 'qa15_basic-deduction',
      'qa16': 'qa16_basic-induction',
      'qa17': 'qa17_positional-reasoning',
      'qa18': 'qa18_size-reasoning',
      'qa19': 'qa19_path-finding',
      'qa20': 'qa20_agents-motivations'
  }

meta_data_filename = _TASKS[babi_task_id] + '-meta_data.json'
metadata_path = os.path.join(data_dir, meta_data_filename)


FLAGS = tf.flags.FLAGS
FLAGS.data_dir = data_dir

truncated_story_length = 130 if babi_task_id == 'qa3' else 70

with tf.gfile.GFile(metadata_path, mode='r') as f:
  metadata = json.load(f)
max_story_length = metadata['max_story_length']
max_sentence_length = metadata['max_sentence_length']
max_question_length = metadata['max_question_length']

print(max_story_length)
print(max_sentence_length)
print(max_question_length)

70
7
5


In [0]:
tf.reset_default_graph()

class bAbiAttentionVisualizer(object):
  """Helper object for creating Attention visualizations."""

  def __init__(
      self, hparams_set, model_name, data_dir, problem_name, beam_size=1):
    story, question, targets, samples, att_mats = build_model(
        hparams_set, model_name, data_dir, problem_name, beam_size=beam_size)

    # Fetch the problem
    babi_problem = problems.problem(problem_name)
    encoders = babi_problem.feature_encoders(data_dir)

    self.story = story
    self.question = question
    self.targets = targets
    self.att_mats = att_mats
    self.samples = samples
    self.encoders = encoders

  def encode(self, story_str, question_str):
    """Input str to features dict, ready for inference."""
    
    story_str = babi_qa._normalize_string(story_str)
    question_str = babi_qa._normalize_string(question_str)
    story = story_str.strip().split('.')
    story = [self.encoders[babi_qa.FeatureNames.STORY].encode(sentence) 
                       for sentence in story[-truncated_story_length:]]
    question = self.encoders[babi_qa.FeatureNames.QUESTION].encode(question_str)
     
    for sentence in story:
      for _ in range(max_sentence_length - len(sentence)):
        sentence.append(babi_qa.PAD)
      assert len(sentence) == max_sentence_length

    for _ in range(max_story_length - len(story)):
      story.append([babi_qa.PAD for _ in range(max_sentence_length)])

    for _ in range(max_question_length - len(question)):
      question.append(babi_qa.PAD)

    assert len(story) == max_story_length
    assert len(question) == max_question_length   

    story_flat = [token_id for sentence in story for token_id in sentence]
    
    batch_story = np.reshape(np.array(story_flat), 
                             [1, max_story_length, max_sentence_length, 1])
    batch_question = np.reshape(np.array(question), 
                         [1, 1, max_question_length, 1])
    return batch_story, batch_question

  def decode_story(self, integers):
    """List of ints to str."""
    integers = np.squeeze(integers).tolist()
    story = []
    for sent in integers:
      sent_decoded = self.encoders[babi_qa.FeatureNames.STORY].decode_list(sent)
      sent_decoded.append('.')
      story.append(sent_decoded)
    return story
  
  def decode_question(self, integers):
    """List of ints to str."""
    integers = np.squeeze(integers).tolist()
    return self.encoders[babi_qa.FeatureNames.QUESTION].decode_list(integers)
  
  def decode_targets(self, integers):
    """List of ints to str."""
    integers = np.squeeze(integers).tolist()
    return self.encoders["targets"].decode(integers)

  def get_vis_data_from_string(self, sess, story_str, question_str):
    """Constructs the data needed for visualizing attentions.

    Args:
      sess: A tf.Session object.
      input_string: The input setence to be visulized.

    Returns:
      Tuple of (
          output_string: The answer
          input_list: Tokenized input sentence.
          output_list: Tokenized answer.
          att_mats: Tuple of attention matrices; (
              enc_atts: Encoder self attention weights.
                A list of `num_layers` numpy arrays of size
                (batch_size, num_heads, inp_len, inp_len)

          )
    """
    encoded_story, encoded_question = self.encode(story_str, question_str)

    # Run inference graph to get the label.
    out = sess.run(self.samples, {
        self.story: encoded_story,
        self.question: encoded_question,
    })

    # Run the decoded answer through the training graph to get the
    # attention tensors.
    att_mats = sess.run(self.att_mats, {
        self.story: encoded_story,
        self.question: encoded_question,
        self.targets: np.reshape(out, [1, -1, 1, 1]),
    })
    
    output = self.decode_targets(out)
    story_list = self.decode_story(encoded_story)
    question_list = self.decode_question(encoded_question)
    
    return story_list, question_list, output, att_mats


def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
  """Build the graph required to featch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory contatining training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a traslation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids.
        targets: Targets placeholder to feed to th when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  babi_model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)
          
  story = tf.placeholder(tf.int32, shape=(
      1, max_story_length, max_sentence_length, 1), 
                            name=babi_qa.FeatureNames.STORY)
  question = tf.placeholder(tf.int32, shape=(
      1, 1, max_question_length, 1), 
                            name=babi_qa.FeatureNames.QUESTION)
  targets = tf.placeholder(tf.int32, shape=(1, 1, 1, 1), name='targets')
  
  babi_model({
      babi_qa.FeatureNames.STORY: story,
      babi_qa.FeatureNames.QUESTION: question,
      'targets': targets,
  })

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # interence graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(babi_model)

  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = babi_model.infer({
       babi_qa.FeatureNames.STORY: story,
       babi_qa.FeatureNames.QUESTION: question,
    }, beam_size=beam_size)['outputs']

  return story, question, targets, samples, att_mats


def get_att_mats(babi_model):
  """Get's the tensors representing the attentions from a build model.

  The attentions are stored in a dict on the Transformer object while building
  the graph.

  Args:
    babi_model: Transformer object to fetch the attention weights from.

  Returns:
  Tuple of attention matrices; (
      enc_atts: Encoder self attention weights.
        A list of `num_layers` numpy arrays of size
        (batch_size, num_heads, inp_len, inp_len)
  )
  """
  enc_atts = []
  
  
  prefix = model_name + '/parallel_0_6/'+ model_name + '/body'
  postfix = 'multihead_attention/dot_product_attention/attention_weights:0'
 
  for i in range(babi_model.hparams.num_hidden_layers):
#     print(babi_model.attention_weights)
    layer = 'layer' if i==0 else 'layer_{}'.format(i)
  babi_model.
    enc_att = tf.get_default_graph().get_tensor_by_name(
        '%s/encoder/%s/self_attention/%s' % (prefix,layer, postfix))
    print(enc_att)
    enc_atts.append(enc_att)
    
    
#   prefix = model_name + '/body'
#   postfix = 'multihead_attention/dot_product_attention'
#   for i in range(babi_model.hparams.num_hidden_layers):
#     print(babi_model.attention_weights)
#     layer = 'layer' # if i==0 else 'layer_{}'.format(i)
#     enc_att = babi_model.attention_weights[
#         '%s/encoder/%s/self_attention/%s' % (prefix,layer, postfix)]
#     enc_atts.append(enc_att)

  return enc_atts

## Visualization

In [6]:
visualizer = bAbiAttentionVisualizer(hparams_set, model_name, data_dir, problem_name, beam_size=1)

[2018-03-09 16:28:04,108] Setting T2TModel mode to 'eval'
[2018-03-09 16:28:04,109] Setting hparams.dropout to 0.0
[2018-03-09 16:28:04,110] Setting hparams.layer_prepostprocess_dropout to 0.0
[2018-03-09 16:28:04,111] Setting hparams.attention_dropout to 0.0
[2018-03-09 16:28:04,111] Setting hparams.symbol_dropout to 0.0
[2018-03-09 16:28:04,112] Setting hparams.relu_dropout to 0.0
[2018-03-09 16:28:04,116] Using variable initializer: uniform_unit_scaling
[2018-03-09 16:28:04,149] Transforming feature 'question' with symbol_modality_40_128.bottom
[2018-03-09 16:28:04,249] Transforming feature 'story' with symbol_modality_40_128.bottom
[2018-03-09 16:28:04,257] Transforming 'targets' with class_label_modality_40_128.targets_bottom
[2018-03-09 16:28:04,260] Building model body
[2018-03-09 16:28:06,477] From /google/src/cloud/dehghani/t2t_babi/google3/blaze-bin/third_party/py/tensor2tensor/google/colab.runfiles/google3/third_party/py/tensor2tensor/layers/common_layers.py:498: calling red

Tensor("babi2l_transformer_encoder_sentence_share_layer_params/parallel_0_6/babi2l_transformer_encoder_sentence_share_layer_params/body/encoder/layer/self_attention/multihead_attention/dot_product_attention/attention_weights:0", shape=(1, 4, 71, 71), dtype=float32)
Tensor("babi2l_transformer_encoder_sentence_share_layer_params/parallel_0_6/babi2l_transformer_encoder_sentence_share_layer_params/body/encoder/layer_1/self_attention/multihead_attention/dot_product_attention/attention_weights:0", shape=(1, 4, 71, 71), dtype=float32)
Tensor("babi2l_transformer_encoder_sentence_share_layer_params/parallel_0_6/babi2l_transformer_encoder_sentence_share_layer_params/body/encoder/layer_2/self_attention/multihead_attention/dot_product_attention/attention_weights:0", shape=(1, 4, 71, 71), dtype=float32)
Tensor("babi2l_transformer_encoder_sentence_share_layer_params/parallel_0_6/babi2l_transformer_encoder_sentence_share_layer_params/body/encoder/layer_3/self_attention/multihead_attention/dot_product

In [7]:
tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')

sess = tf.train.MonitoredTrainingSession(
    checkpoint_dir=CHECKPOINT,
    save_summaries_secs=0,
)

[2018-03-09 16:28:09,307] Create CheckpointSaverHook.
[2018-03-09 16:28:14,486] Graph was finalized.
[2018-03-09 16:28:17,947] Restoring parameters from /cns/vz-d/home/dehghani/babi/output/en-10k_qa2/token/babi_qa_as_storysentquestion2label/babi2l_transformer_encoder_sentence_share_layer_params_transformer_tiny_bs_1024_4hl/model.ckpt-248038
[2018-03-09 16:28:27,493] Running local_init_op.
[2018-03-09 16:28:27,508] Done running local_init_op.


In [0]:

"""Module for postprocessing and displaying tranformer attentions.

This module is designed to be called from an ipython notebook.
"""

import json
import os

import IPython.display as display

import numpy as np

vis_html = """
  <span style="user-select:none">
    Layer: <select id="layer"></select>
    Attention: <select id="att_type">
      <option value="inp_inp">Input - Input</option>
    </select>
  </span>
  <div id='vis'></div>
"""


# __location__ = os.path.realpath(
#     os.path.join(os.getcwd(), os.path.dirname(__file__)))
# vis_js = open(os.path.join(__location__, 'attention.js')).read()

vis_js = tf.gfile.Open('attention.js').read()


def pad_remover(attention):
  inp_inp_atts = attention['inp_inp']
  att_array = np.array(inp_inp_atts['att'])
  top = inp_inp_atts['top_text']
  bot = inp_inp_atts['bot_text']
  pad_index = [ i for i, sent in enumerate(top) if sent.startswith('<pad>')]
  start = min(pad_index)
  end = max(pad_index)
  filtered_att_d2 = np.concatenate((att_array[:,:,:start, :],att_array[:,:,end+1:, :]), axis=2)
  filtered_att = np.concatenate((filtered_att_d2[:,:,:, :start],filtered_att_d2[:,:,:, end+1:]), axis=3)
  filteredtop = top[:start] + top[end+1:]
  filteredbot = bot[:start] + bot[end+1:]
  filteredtop = [sent.replace('<pad> ', '') for sent in filteredtop]
  filteredbot = [sent.replace('<pad> ', '') for sent in filteredbot]
  inp_inp_atts['att'] = filtered_att.tolist()
  inp_inp_atts['top_text'] = filteredtop
  inp_inp_atts['bot_text'] = filteredbot
  attention['inp_inp'] = inp_inp_atts
  return attention
  
def show(inp_text, out_text, enc_atts):
  enc_att = resize(enc_atts)
  attention = _get_attention(
      inp_text, out_text, enc_att)
  attention = pad_remover(attention)
  att_json = json.dumps(attention)
  _show_attention(att_json)
  return attention


def _show_attention(att_json):
  display.display(display.HTML(vis_html))
  display.display(display.Javascript('window.attention = %s' % att_json))
  display.display(display.Javascript(vis_js))


def resize(att_mat, max_length=None):
  """Normalize attention matrices and reshape as necessary."""
  for i, att in enumerate(att_mat):
    # Add extra batch dim for viz code to work.
    if att.ndim == 3:
      att = np.expand_dims(att, axis=0)
    if max_length is not None:
      # Sum across different attention values for each token.
      att = att[:, :, :max_length, :max_length]
      row_sums = np.sum(att, axis=2)
      # Normalize
      att /= row_sums[:, :, np.newaxis]
    att_mat[i] = att
  return att_mat


def _get_attention(inp_text, out_text, enc_atts):
  """Compute representation of the attention ready for the d3 visualization.

  Args:
    inp_text: list of strings, words to be displayed on the left of the vis
    out_text: list of strings, words to be displayed on the right of the vis
    enc_atts: numpy array, encoder self-attentions
        [num_layers, batch_size, num_heads, enc_length, enc_length]

  Returns:
    Dictionary of attention representations with the structure:
    {
      'inp_inp': Representations for showing encoder self-attentions
    }
    and each sub-dictionary has structure:
    {
      'att': list of inter attentions matrices, one for each attention head
      'top_text': list of strings, words to be displayed on the left of the vis
      'bot_text': list of strings, words to be displayed on the right of the vis
    }
  """

  def get_inp_inp_attention(layer):
    att = np.transpose(enc_atts[layer][0], (0, 2, 1))
    return [ha.T.tolist() for ha in att]

  def get_attentions(get_attention_fn):
    num_layers = len(enc_atts)
    attentions = []
    for i in range(num_layers):
      attentions.append(get_attention_fn(i))

    return attentions

  attentions = {
      'inp_inp': {
          'att': get_attentions(get_inp_inp_attention),
          'top_text': inp_text,
          'bot_text': inp_text,
      },
  }

  return attentions

In [9]:
if babi_task_id == 'qa1':
#   input_story = "John travelled to the hallway.Mary journeyed to the bathroom."
#   input_question = "Where is John?" #hallway
  
  input_story = "John travelled to the hallway.Mary journeyed to the bathroom.Daniel went back to the bathroom.John moved to the bedroom."
  input_question = "Where is Mary?" #bathroom

elif babi_task_id == 'qa2':
  input_story = "Mary got the milk there.John moved to the bedroom.Sandra went back to the kitchen.Mary travelled to the hallway."
  input_question = "Where is the milk?" #hallway
  
#   input_story = "Mary got the milk there.John moved to the bedroom.Sandra went back to the kitchen.Mary travelled to the hallway.John got the football there.John went to the hallway."
#   input_question = "Where is the football?" #hallway

elif babi_task_id == 'qa3':
  input_story = "Mary got the milk.John moved to the bedroom.Daniel journeyed to the office.John grabbed the apple there.John got the football.John journeyed to the garden.Mary left the milk.John left the football.Daniel moved to the garden.Daniel grabbed the football.Mary moved to the hallway.Mary went to the kitchen.John put down the apple there.John picked up the apple.Sandra moved to the hallway.Daniel left the football there.Daniel took the football.John travelled to the kitchen.Daniel dropped the football.John dropped the apple.John grabbed the apple.John went to the office.Sandra went back to the bedroom.Sandra took the milk.John journeyed to the bathroom.John travelled to the office.Sandra left the milk.Mary went to the bedroom.Mary moved to the office.John travelled to the hallway.Sandra moved to the garden.Mary moved to the kitchen.Daniel took the football.Mary journeyed to the bedroom.Mary grabbed the milk there.Mary discarded the milk.John went to the garden.John discarded the apple there."
  input_question = "Where was the apple before the bathroom?" #office
  
#   input_story = "Mary got the milk.John moved to the bedroom.Daniel journeyed to the office.John grabbed the apple there.John got the football.John journeyed to the garden.Mary left the milk.John left the football.Daniel moved to the garden.Daniel grabbed the football.Mary moved to the hallway.Mary went to the kitchen.John put down the apple there.John picked up the apple.Sandra moved to the hallway.Daniel left the football there.Daniel took the football.John travelled to the kitchen.Daniel dropped the football.John dropped the apple.John grabbed the apple.John went to the office.Sandra went back to the bedroom.Sandra took the milk.John journeyed to the bathroom.John travelled to the office.Sandra left the milk.Mary went to the bedroom.Mary moved to the office.John travelled to the hallway.Sandra moved to the garden.Mary moved to the kitchen.Daniel took the football.Mary journeyed to the bedroom.Mary grabbed the milk there.Mary discarded the milk.John went to the garden.John discarded the apple there.Sandra travelled to the bedroom.Daniel moved to the bathroom."
#   input_question = "Where was the apple before the hallway?" #office
  
  
story_text, question_text, output, att_mats = visualizer.get_vis_data_from_string(sess, input_story, input_question)
print(output)
# print(story_text)
# print(question_text)
print(np.array(att_mats).shape)


[2018-03-09 16:28:34,099] Saving checkpoints for 248038 into /cns/vz-d/home/dehghani/babi/output/en-10k_qa2/token/babi_qa_as_storysentquestion2label/babi2l_transformer_encoder_sentence_share_layer_params_transformer_tiny_bs_1024_4hl/model.ckpt.


hallway
(4, 1, 4, 71, 71)


In [10]:
inp_text = []
for sent in story_text:
  inp_text.append(' '.join(sent))
inp_text.append(' '.join(question_text))
out_text = [output]

from colabtools import publish
def import_js_deps():
  publish.script_url("https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.5/require.min.js")

  publish.javascript('''
  requirejs.config({
      "paths": {
        "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
        "jquery": "//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min",
      }
  });
  ''')

import_js_deps()
attention = show(inp_text, out_text, att_mats)

<IPython.core.display.Javascript at 0x7f6a16d8fe50>

<IPython.core.display.Javascript at 0x7f6a16d8fe90>