# Create Your Own Visualizations!
Instructions:
1. Install tensor2tensor and train up a Transformer model following the instruction in the repository https://github.com/tensorflow/tensor2tensor.
2. Update cell 3 to point to your checkpoint, it is currently set up to read from the default checkpoint location that would be created from following the instructions above.
3. If you used custom hyper parameters then update cell 4.
4. Run the notebook!

In [1]:
import os

import tensorflow as tf
import numpy as np

from tensor2tensor import problems
from tensor2tensor.bin import t2t_decoder  # To register the hparams set
from tensor2tensor.utils import registry
from tensor2tensor.utils import trainer_lib
from tensor2tensor.visualization import attention
from tensor2tensor.visualization import visualization
from tensor2tensor.utils import usr_dir
from itertools import groupby

  from ._conv import register_converters as _register_converters


In [2]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});

<IPython.core.display.Javascript object>

## HParams

In [3]:
# PUT THE MODEL YOU WANT TO LOAD HERE!
CHECKPOINT = "../checkpoints/word_to_phonetic/transformer-transformer_base_single_gpu-fr-best_model"

In [4]:
# HParams
problem_name = 'word_to_phonetic'
data_dir = "../data_dir/fr"
model_name = "transformer"
hparams_set = "transformer_base"

In [5]:
#Adding word to phonetic problem to the problem list
submodule_dir = "../submodule"
usr_dir.import_usr_dir(submodule_dir)

INFO:tensorflow:Importing user module submodule from path /home/olivier/Bureau/Transformer_test


## Visualization

In [6]:
visualizer = visualization.AttentionVisualizer(hparams_set, model_name, data_dir, problem_name, beam_size=5)

INFO:tensorflow:Setting T2TModel mode to 'eval'
INFO:tensorflow:Setting hparams.symbol_dropout to 0.0
INFO:tensorflow:Setting hparams.relu_dropout to 0.0
INFO:tensorflow:Setting hparams.dropout to 0.0
INFO:tensorflow:Setting hparams.attention_dropout to 0.0
INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0
INFO:tensorflow:Using variable initializer: uniform_unit_scaling
INFO:tensorflow:Transforming feature 'inputs' with symbol_modality_258_512.bottom
INFO:tensorflow:Transforming 'targets' with symbol_modality_258_512.targets_bottom
INFO:tensorflow:Building model body
Instructions for updating:
keep_dims is deprecated, use keepdims instead
INFO:tensorflow:Transforming body output with symbol_modality_258_512.top
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

INFO:tensorflow:Beam Decoding with beam size 5
Instructions for updating

In [7]:
tf.Variable(0, dtype=tf.int64, trainable=False, name='global_step')

sess = tf.train.MonitoredTrainingSession(
    checkpoint_dir=CHECKPOINT,
    save_summaries_secs=0,
)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ../checkpoints/word_to_phonetic/transformer-transformer_base_single_gpu-fr-best_model/model.ckpt-9001


In [22]:
#input example
input_word = "test"
_, inp_text, out_text, att_mats = visualizer.get_vis_data_from_string(sess, input_word)
inp_text = [str(c, 'Latin-1') for c in inp_text] #Decodes Latin-1 because of Frenche and Spanish special chars
out_text = [str(c, 'Latin-1') for c in out_text]

#Uncomment to see all attention heads and have a better understanding of what's happening inside the model
#attention.show(inp_text, out_text, *att_mats)

#Removes both padding and "end of sequence" markers
inp_text = [v for v in inp_text if v != '<EOS>']
out_text = [v for v in out_text if v != '<EOS>' and v != '<pad>']

#Gets layes 0 and 4 of the "inp_out" matrices
att_matrices = np.array(attention._get_attention(inp_text, out_text, *att_mats)["inp_out"]["att"])[np.array([0,4,5]),:,:,:]

In [23]:
visualizer.encode("".join(out_text))

array([[[[118]],

        [[ 71]],

        [[117]],

        [[118]],

        [[  1]]]])

In [9]:
def normalize(matrix):
    """
        input: a numpy matrix
        return: matrix with 0 mean and 1 std
    """
    return (matrix - np.mean(matrix))/np.std(matrix)

#Sum all attention heads
sum_all_head = np.sum(att_matrices, axis=1)

#Sum layers 0 and 4
sum_all_layers = normalize(np.sum(sum_all_head,axis=0)[:len(out_text), :len(inp_text)])

In [10]:
#Base threshold
#fr : 0.75
#es : 0.4
if len(out_text) > 4:
    threshold = 0.75
else : 
    threshold = 0

#While we have too many silent_letters detected
while(True):
    #Gets the silent_letters indices
    #We consider that a letter is silent if its attention value is below mean attention + threshold * std attention
    silent_letters_idx = [i for i, idx in enumerate(np.argmax(sum_all_layers, axis = 0)) 
                          if sum_all_layers[idx, i] < np.mean(sum_all_layers[idx,:]) 
                          + threshold*np.std(sum_all_layers[idx,:])]
    #Reduces threshold if too many silent letters are detected
    #Can happen in french when we have 3 lettres graphemes
    if len(silent_letters_idx) > 1/3 * len(inp_text):
        threshold -= 0.1
    else:
        break

#Creates the phoneme attribution list
phon_list = np.array(out_text)[np.argmax(sum_all_layers, axis = 0)]
phon_list[silent_letters_idx] = "#" #"#" is our encoding for silent letters
phon_list = phon_list.tolist() #needed for the += just below

#Checks if all the phonemes are attributed and if they are only present the correct number of time in the list
#If not, the phoneme is concatenated to its most probable neighbor
#and the least probable phoneme is replaced by a silent letter (this can happen for small datasets)
for i, phon in enumerate(out_text):
    if phon not in phon_list:
        phon_list[np.argmax(sum_all_layers[i,:])] += phon
    
#     test = np.where(np.array(phon_list) == phon)[0]
#     if len(test > 1):
#         phon_list[np.max(test)] = "%"

##NOT WORKING PROPERLY

#Creates the g_p tupple list
g_p = [(l, phon_list[i]) for i, l in enumerate(inp_text)]

#Creates the final g_p mapping
mapping = []
for phon, letters in groupby(g_p, lambda x: x[1]):
    graph = "".join([letter[0] for letter in letters])
    mapping.append(graph + "-" + phon)
print(mapping)

['hu-y', 'l-l', 'u-y', 'l-l', 'a-E', 'i-#', 'e-E', 'nt-#']


In [11]:
def recurent_check(normalized_value_row, idx_row, idx_last_phon, out_text, j):
    print(normalized_value_row, np.std(normalized_value_row))
    if normalized_value_row[j] > 0.75*np.std(normalized_value_row):
        if(idx_row[j] == 0 or idx_row[j] >= idx_last_phon):
            return idx_row[j], out_text[idx_row[j]]
        else:
            return recurent_check(normalized_value_row, idx_row, idx_last_phon, out_text, j+1)
    else:
        return -1, "$"


phon_list = []
idx_last_phon = 0

idx_sorted_phon_per_letter = np.argsort(-sum_all_layers, axis=0)[:,:].transpose()
value_sorted_phon_per_letter = -np.sort(-sum_all_layers, axis=0)[:,:].transpose()

for i, idx in enumerate(idx_sorted_phon_per_letter[:,0]):
    normalized_value_row = value_sorted_phon_per_letter[i,:]
    idx_row = idx_sorted_phon_per_letter[i,:]
    
    idx_phon, phon = recurent_check(normalized_value_row, idx_row, idx_last_phon, out_text, 0)
    
    phon_list.append(phon)
    idx_last_phon = idx_phon
    
#Creates the g_p tupple list
g_p = [(l, phon_list[i]) for i, l in enumerate(inp_text)]

#Creates the final g_p mapping
mapping = []
for phon, letters in groupby(g_p, lambda x: x[1]):
    graph = "".join([letter[0] for letter in letters])
    mapping.append(graph + "-" + phon)
print(mapping)

[ 1.06525102 -0.31792047 -0.36558086 -0.46200795 -0.66098948] 0.6180536102139619
[ 1.8992929   0.69367388 -0.41562538 -0.63287608 -0.66053605] 0.9941644975265498
[ 3.0639282   0.43804662 -0.10846462 -0.36167091 -0.66071512] 1.344188215406739
[ 3.3779973   0.40099458  0.10375085 -0.32120024 -0.65302955] 1.4436172856362397
[ 2.85270214  0.20574147 -0.07796076 -0.22529724 -0.32006145] 1.1961228347479336
[ 2.27176321 -0.32876776 -0.54840714 -0.66066426 -0.66095732] 1.1350791797926991
[ 0.501696   -0.29333061 -0.55300606 -0.58932894 -0.63060675] 0.4240167590987156
[ 0.59766129 -0.2974547  -0.6410225  -0.64869399 -0.65555726] 0.4828867736564058
[ 0.05114794 -0.38156322 -0.64586064 -0.64863634 -0.6608068 ] 0.2748945166468728
[-0.29617655 -0.29777911 -0.56485272 -0.62207582 -0.65416272] 0.15777707881038736
['hu-y', 'l-l', 'u-y', 'l-l', 'aie-E', 'nt-$']


In [12]:
g_p = []
base_threshold = 2

idx_sorted_letter_per_phon = np.argsort(-sum_all_layers, axis=1)[:,:]
value_sorted_letter_per_phon = -np.sort(-sum_all_layers, axis=1)[:,:]

for i, phon in enumerate(out_text):
    normalized_value_row = value_sorted_letter_per_phon[i,:]
    idx_row = idx_sorted_letter_per_phon[i,:]
    threshold = base_threshold
    while(True):
        letters = np.array(inp_text)[idx_row[np.where(normalized_value_row > threshold * np.std(normalized_value_row))]]
        if len(letters) == 0:
            threshold -= 0.1
        else:
            break
    for l in letters:
        g_p.append((l, phon))
        
for i, l in enumerate(inp_text):
    if i == len(g_p):
        g_p.append((l, "$"))
    elif g_p[i][0] != l:
        g_p.insert(i, (l, "$"))

mapping = []
for phon, letters in groupby(g_p, lambda x: x[1]):
    graph = "".join([letter[0] for letter in letters])
    mapping.append(graph + "-" + phon)
print(mapping)

['h-$', 'u-y', 'l-l', 'u-y', 'l-l', 'a-E', 'ient-$']


In [13]:
phon_list = []
idx_sorted_phon_per_letter = np.argsort(-sum_all_layers, axis=0)[:,:].transpose()
value_sorted_phon_per_letter = -np.sort(-sum_all_layers, axis=0)[:,:].transpose()

for i, idx in enumerate(idx_sorted_phon_per_letter[:,0]):
    normalized_value_row = value_sorted_phon_per_letter[i,:]
    idx_row = idx_sorted_phon_per_letter[i,:]
    
    phon = np.array(out_text)[idx_row[np.where(normalized_value_row > 1.2 * np.std(normalized_value_row))]]
    if len(phon) > 0:
        phon_list.append("".join(phon))
    else:
        phon_list.append("$")

#Creates the g_p tupple list
g_p = [(l, phon_list[i]) for i, l in enumerate(inp_text)]

#Creates the final g_p mapping
mapping = []
for phon, letters in groupby(g_p, lambda x: x[1]):
    graph = "".join([letter[0] for letter in letters])
    mapping.append(graph + "-" + phon)
print(mapping)

['hu-y', 'l-l', 'u-y', 'l-l', 'a-E', 'i-$', 'e-E', 'nt-$']


In [14]:
def get_gp_match(i, txt, pred, attentionMatrix, treshold = 0.5):
    temp = []
    previous = 0
    if txt[i] != ['']:
        if len(txt[i]) > 1 and len(pred[i]) > 1:
            for j in range(len(pred[i])):
                graph = ""
                if j+1 < len(attentionMatrix[i]):            
                    end = attentionMatrix[i][j+1].argmax()
                    while ((previous < end or previous == attentionMatrix[i][j].argmax())
                           and (attentionMatrix[i][j+1][previous] < treshold or attentionMatrix[i][j][previous] > treshold)):

                        graph += txt[i][previous]
                        previous +=1

                else:           
                    while (previous < len(attentionMatrix[i][j]) 
                           and (attentionMatrix[i][j][previous] > treshold
                           or previous == attentionMatrix[i][j].argmax()+1
                           or previous == attentionMatrix[i][j].argmax()+2
                           or previous == attentionMatrix[i][j].argmax()+3)):
                        graph += txt[i][previous]
                        previous +=1                      
                g = graph
                p = pred[i][j]
                gp = ("-").join([g,p])
                temp.append(gp)

                if g == "":
                    shift(temp,j)

        elif len(txt[i]) == 1:
            g = txt[i][0]
            p = pred[i][0]
            gp = ("-").join([g,p])
            temp.append(gp)

        elif len(pred[i]) == 1:
            g = ("").join(txt[i])
            p = pred[i][0]
            gp = ("-").join([g,p])
            temp.append(gp)

    return temp