In [1]:
import os

import tensorflow as tf
import numpy as np

from tensor2tensor import problems
from tensor2tensor.bin import t2t_decoder  # To register the hparams set
from tensor2tensor.utils import registry
from tensor2tensor.utils import trainer_lib
from tensor2tensor.visualization import attention
from tensor2tensor.visualization import visualization
from tensor2tensor.utils import usr_dir
from itertools import groupby

  from ._conv import register_converters as _register_converters


In [2]:
# PUT THE MODEL YOU WANT TO LOAD HERE!
CHECKPOINT = "../checkpoints/word_to_phonetic_vocab/transformer-transformer_base_single_gpu-en-best_model"

# HParams
problem_name = 'word_to_phonetic_vocab'
data_dir = "../data_dir/en"
model_name = "transformer"
hparams_set = "transformer_base_single_gpu"

#Adding word to phonetic problem to the problem list
submodule_dir = "../submodule"
usr_dir.import_usr_dir(submodule_dir)

INFO:tensorflow:Importing user module submodule from path /home/olivier/Bureau/Transformer_test


In [3]:
def get_att_mats(translate_model):
  """Get's the tensors representing the attentions from a build model.

  The attentions are stored in a dict on the Transformer object while building
  the graph.

  Args:
    translate_model: Transformer object to fetch the attention weights from.

  Returns:
  Tuple of attention matrices; (
      enc_atts: Encoder self attention weights.
        A list of `num_layers` numpy arrays of size
        (batch_size, num_heads, inp_len, inp_len)
      dec_atts: Decoder self attetnion weights.
        A list of `num_layers` numpy arrays of size
        (batch_size, num_heads, out_len, out_len)
      encdec_atts: Encoder-Decoder attention weights.
        A list of `num_layers` numpy arrays of size
        (batch_size, num_heads, out_len, inp_len)
  )
  """
  enc_atts = []
  dec_atts = []
  encdec_atts = []

  prefix = 'transformer/body/'
  postfix = '/multihead_attention/dot_product_attention'

  for i in range(translate_model.hparams.num_hidden_layers):
    enc_att = translate_model.attention_weights[
        '%sencoder/layer_%i/self_attention%s' % (prefix, i, postfix)]
    dec_att = translate_model.attention_weights[
        '%sdecoder/layer_%i/self_attention%s' % (prefix, i, postfix)]
    encdec_att = translate_model.attention_weights[
        '%sdecoder/layer_%i/encdec_attention%s' % (prefix, i, postfix)]
    enc_atts.append(enc_att)
    dec_atts.append(dec_att)
    encdec_atts.append(encdec_att)

  return enc_atts, dec_atts, encdec_atts


In [4]:
def build_model(hparams_set, model_name, data_dir, problem_name, beam_size=1):
  """Build the graph required to featch the attention weights.

  Args:
    hparams_set: HParams set to build the model with.
    model_name: Name of model.
    data_dir: Path to directory contatining training data.
    problem_name: Name of problem.
    beam_size: (Optional) Number of beams to use when decoding a traslation.
        If set to 1 (default) then greedy decoding is used.

  Returns:
    Tuple of (
        inputs: Input placeholder to feed in ids to be translated.
        targets: Targets placeholder to feed to translation when fetching
            attention weights.
        samples: Tensor representing the ids of the translation.
        att_mats: Tensors representing the attention weights.
    )
  """
  hparams = trainer_lib.create_hparams(
      hparams_set, data_dir=data_dir, problem_name=problem_name)
  translate_model = registry.model(model_name)(
      hparams, tf.estimator.ModeKeys.EVAL)

  inputs = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs')
  targets = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='targets')
  translate_model({
      'inputs': inputs,
      'targets': targets,
  })

  # Must be called after building the training graph, so that the dict will
  # have been filled with the attention tensors. BUT before creating the
  # interence graph otherwise the dict will be filled with tensors from
  # inside a tf.while_loop from decoding and are marked unfetchable.
  att_mats = get_att_mats(translate_model)

  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    samples = translate_model.infer({
        'inputs': inputs,
    }, beam_size=beam_size)['outputs']

  return inputs, targets, samples, att_mats

In [8]:
def export_model(saved_model_dir):
    att_mat_inp_out_layer_0 = tf.squeeze(tf.reduce_sum(att_mats[2][0], axis=1))
    att_mat_inp_out_layer_4 = tf.squeeze(tf.reduce_sum(att_mats[2][4], axis=1))
    att_mat_inp_out_layer_5 = tf.squeeze(tf.reduce_sum(att_mats[2][5], axis=1))

    input_get_phon = {'input': tf.saved_model.utils.build_tensor_info(inputs)}

    output_get_phon = {'phon': tf.saved_model.utils.build_tensor_info(samples)}

    signature_get_phon = tf.saved_model.signature_def_utils.build_signature_def(
        inputs=input_get_phon,
        outputs=output_get_phon,
        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
    )

    input_get_att_mats = {'input': tf.saved_model.utils.build_tensor_info(inputs),
              'phon': tf.saved_model.utils.build_tensor_info(targets)}

    output_get_att_mats = {'att_mat_inp_out_layer_0': tf.saved_model.utils.build_tensor_info(att_mat_inp_out_layer_0),
                          'att_mat_inp_out_layer_4': tf.saved_model.utils.build_tensor_info(att_mat_inp_out_layer_4),
                          'att_mat_inp_out_layer_5': tf.saved_model.utils.build_tensor_info(att_mat_inp_out_layer_5)}

    signature_get_att_mats = tf.saved_model.signature_def_utils.build_signature_def(
        inputs=input_get_att_mats,
        outputs=output_get_att_mats,
        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
    )

    legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')

    # Save out the SavedModel.
    builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir)
    builder.add_meta_graph_and_variables(
        sess, [tf.saved_model.tag_constants.SERVING],
        signature_def_map={
            'get_phon': signature_get_phon,
            'get_att_mats': signature_get_att_mats
        },
        legacy_init_op=legacy_init_op)
    builder.save()
        
def _load_model(model_dir,saver ,sess):
    import re
    ckpt = tf.train.get_checkpoint_state(model_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        saver.restore(sess, os.path.join(model_dir, ckpt_name))
        counter = int(next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0))
        return True, counter
    return False, 0

In [6]:
inputs, targets, samples, att_mats = build_model(hparams_set, model_name, data_dir, problem_name, beam_size = 5)

sess = tf.Session()

saver = tf.train.Saver()

_load_model(CHECKPOINT, saver, sess)

INFO:tensorflow:Setting T2TModel mode to 'eval'
INFO:tensorflow:Setting hparams.relu_dropout to 0.0
INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0
INFO:tensorflow:Setting hparams.symbol_dropout to 0.0
INFO:tensorflow:Setting hparams.attention_dropout to 0.0
INFO:tensorflow:Setting hparams.dropout to 0.0
INFO:tensorflow:Using variable initializer: uniform_unit_scaling
INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_158_512.bottom
INFO:tensorflow:Transforming 'targets' with symbol_modality_158_512.targets_bottom
INFO:tensorflow:Building model body
Instructions for updating:
keep_dims is deprecated, use keepdims instead
INFO:tensorflow:Transforming body output with symbol_modality_158_512.top
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

INFO:tensorflow:Beam Decoding with beam size 5
Instructions for updating

(True, 11000)

In [9]:
export_model("en")

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b'en/saved_model.pb'
