In [1]:
import tensorflow as tf
import numpy as np

In [2]:
@tf.function
def is_phi(element):
    tf.debugging.assert_rank(element, 1)
    
    elem_dim = tf.shape(element)[0]
    phi = tf.one_hot(0, elem_dim)
    
    element = tf.math.l2_normalize(element)
    t = tf.tensordot(element, phi, axes=1)

    return t

test1 = tf.Variable([1,0,0], dtype=tf.float32)
test2 = tf.Variable([0,1,0], dtype=tf.float32)
test3 = tf.Variable([.5,.5,0], dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape:
    result1 = is_phi(test1)
    result2 = is_phi(test2)
    result3 = is_phi(test3)

tf.print(result1, tape.gradient(result1, test1))
tf.print(result2, tape.gradient(result2, test2))
tf.print(result3, tape.gradient(result3, test3))

1 [0 0 0]
0 [1 0 0]
0.707106769 [0.707106829 -0.707106709 0]


In [3]:
from library.stacks import stack_push, stack_pop, stack_peek, new_stack, new_stack_from_buffer

In [4]:
@tf.function
def safe_push(stack, element, is_phi_fn):
    tf.debugging.assert_rank_at_least(stack[0], 2)
    tf.debugging.assert_rank(stack[1], 1)
    tf.debugging.assert_equal(tf.shape(stack[0])[1:], tf.shape(element))
    tf.debugging.assert_equal(tf.rank(stack[0]) - 1, tf.rank(element) )
    
    t = is_phi_fn(element)
    
    old_buffer, old_index = stack
    new_buffer, new_index = stack_push(stack, element)

    buffer = t * old_buffer + (1 - t) * new_buffer
    index = t * old_index + (1 - t) * new_index

    # Hack to tell tensorflow that the shape has not changed
    # TODO: Why does this hack work?
    buffer = tf.reshape(buffer, tf.shape(old_buffer))
    index = tf.reshape(index, tf.shape(old_index))

    new_stack = (buffer, index)

    return new_stack

stack = new_stack((3,3), True)
original_stack = stack

element1 = tf.Variable([0,1,0], dtype=tf.float32)
element2 = tf.Variable([0.5,0.5,0], dtype=tf.float32)
element3 = tf.Variable([0,0,1], dtype=tf.float32)
element4 = tf.Variable([0,1,0], dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape:
    stack = safe_push(stack, element1, is_phi)
    stack = safe_push(stack, element2, is_phi)
    stack = safe_push(stack, element3, is_phi)
    stack = safe_push(stack, element4, is_phi)
    
tf.print(stack[0])
tf.print(tf.round(stack[0]))
tf.print(stack[1])
tf.print(tf.round(stack[1]))
tf.print(tape.gradient(stack[0], element3))
tf.print(tape.gradient(stack, original_stack))

[[0 1 0]
 [0.0428932235 0.0428932235 0.707106769]
 [0 0.707106769 0.0857864469]]
[[0 1 0]
 [0 0 1]
 [0 1 0]]
[0.707106769 0.292893231 0]
[1 0 0]
[0.0606601834 0.792893231 0.792893231]
([[0 0 0]
 [0.207106784 0.207106784 0.207106784]
 [0.207106799 0.207106799 0.207106799]], [2.87867975 1.53553391 1.76776707])


In [5]:
@tf.function
def pop_and_purge(stack):
    stack_len = tf.shape(stack[0])[1]
    phi = tf.one_hot(0, stack_len, dtype=tf.float32)
    stack, element = stack_pop(stack)
    stack = stack_push(stack, phi)
    stack, _ = stack_pop(stack)
    
    return stack, element

stack = new_stack_from_buffer(tf.ones((3,3), dtype=tf.float32))

with tf.GradientTape(persistent=True) as tape:
    stack, element = pop_and_purge(stack)
    
tf.print(stack)
tf.print(element)

([[1 1 1]
 [1 1 1]
 [1 0 0]], [0 0 1])
[1 1 1]


In [6]:
stack = new_stack((3,3), True)

element1 = tf.Variable([0,1,0], dtype=tf.float32)
element2 = tf.Variable([1,0,0], dtype=tf.float32)
element3 = tf.Variable([0,0,1], dtype=tf.float32)
element4 = tf.Variable([0,1,0], dtype=tf.float32)

original_stack = stack

with tf.GradientTape(persistent=True) as tape:
    stack = safe_push(stack, element1, is_phi)
    stack = safe_push(stack, element2, is_phi)
    stack = safe_push(stack, element3, is_phi)
    stack = safe_push(stack, element4, is_phi)
    
tf.print(stack[0])
tf.print(stack[1])
tf.print(tape.gradient(stack[0], element3))
tf.print(tape.gradient(stack, original_stack))

[[0 1 0]
 [0 0 1]
 [0 1 0]]
[1 0 0]
[-1 1 1]
([[0 0 0]
 [0 0 0]
 [0 0 0]], [4 1 1])


In [7]:
from library.array_ops import tensor_lookup_2d

In [8]:
TOKEN_DIM = 6
PRODUCTION_DIM = 4
STACK_SIZE = 10
PHI = np.eye(TOKEN_DIM)[0]
S = np.eye(TOKEN_DIM)[1]
O = np.eye(TOKEN_DIM)[2]
T = np.eye(TOKEN_DIM)[3]
X = np.eye(TOKEN_DIM)[4]
PLUS = np.eye(TOKEN_DIM)[5]

E = [PHI, PHI, PHI]

G_s = tf.constant([
    [E, E, E, E, E, E],
    [E, E, E, E, E, E],
    [E, [S, O, T], E, E, E, E],
    [E, [T, PHI, PHI], E, E, E, E],
], dtype=tf.float32)
G_o = tf.constant([
    [E, E, E, [X, PHI, PHI], E, E],
    [E, E, [PLUS, PHI, PHI], E, E, E],
    [E, E, E, E, E, E],
    [E, E, E, E, E, E],
], dtype=tf.float32)
grammar = (G_s, G_o)

In [9]:
def tokens_pretty_print(tokens):
    tokens = tf.argmax(tokens, axis=1)
    lookup = ['_', 'S', 'O', 'T', 'x', '+']
    
    result = ''
    
    for token in tokens:
        result += f'{lookup[token]} '
        
    return result

tokens = tf.transpose(tf.one_hot([0,1,2,3,4,5], TOKEN_DIM, dtype=tf.float32))
tokens_pretty_print(tokens)

'_ S O T x + '

In [10]:
@tf.function
def production_step(grammar, production, stack, output, is_phi_fn):
    tf.debugging.assert_rank(grammar[0], 4)
    tf.debugging.assert_rank(grammar[1], 4)
    tf.debugging.assert_rank(production, 1)
    tf.debugging.assert_rank(stack[0], 2)
    tf.debugging.assert_rank(output[0], 2)
    
    G_s, G_o = grammar
    
    # Save the shapes
    stack_0_shape = tf.shape(stack[0])
    stack_1_shape = tf.shape(stack[1])
    output_0_shape = tf.shape(output[0])
    output_1_shape = tf.shape(output[1])
    
    # Get next token from stack
    stack, stack_top_token = pop_and_purge(stack)

    # Push tokens back onto the stack
    tokens_to_push = tensor_lookup_2d(G_s, production, stack_top_token)
    for token in tf.reverse(tokens_to_push, axis=[0]):
        stack = safe_push(stack, token, is_phi_fn)
    
    # Push tokens to output
    tokens_to_push = tensor_lookup_2d(G_o, production, stack_top_token)
    for token in tokens_to_push:
        output = safe_push(output, token, is_phi_fn)
    
    return stack, output

stack = new_stack(((STACK_SIZE, TOKEN_DIM)))
output = new_stack(((STACK_SIZE, TOKEN_DIM)))

stack = safe_push(stack, tf.constant(S, dtype=tf.float32), is_phi)
production = tf.one_hot(2, PRODUCTION_DIM)

with tf.GradientTape(persistent = True) as tape:
    tape.watch(grammar)
    tape.watch(production)
    tape.watch(stack)
    tape.watch(output)
    
    new_s, new_o = production_step(grammar, production, stack, output, is_phi)

tf.print(tokens_pretty_print(new_s[0]))
# tf.print(tape.gradient(new_o, output))
# tf.print(tape.gradient(new_s, stack))
# tf.print(tape.gradient(new_s[0], grammar[0]).shape)
# tf.print(tape.gradient(new_s[1], grammar[0]).shape)
# tf.print(tape.gradient(new_o[0], grammar[1]).shape)
# tf.print(tape.gradient(new_o[1], grammar[1]).shape)
tf.print(tape.gradient(new_s, production))

Instructions for updating:
This op will be removed after the deprecation date. Please switch to tf.sets.difference().
T O S _ _ _ _ _ _ _ 
[-0.0833333358 -0.0833333358 3 -0.0416666679]


In [11]:
tf.config.experimental_run_functions_eagerly(False)

In [12]:
# @tf.function
def generate(grammar, productions, stack_shape, S, is_phi_fn):
    # Reserve space for stack and output
    stack = new_stack(stack_shape)
    output = new_stack(stack_shape)
    
    # Push S to top of stack
    stack = safe_push(stack, S, is_phi)

    for production in productions:
        top = stack_peek(stack)
        
        before = tokens_pretty_print(stack[0]), tf.argmax(stack[1])
        tf.print('p\t', tf.argmax(production))
        i = tf.argmax(production)
        j = tf.argmax(top)
        tf.print('G_s\t', tokens_pretty_print(G_s[i][j]), (i,j))
        tf.print('G_o\t', tokens_pretty_print(G_o[i][j]), (i,j))
        
        stack, output = production_step(grammar, production, stack, output, is_phi_fn)
        
        tf.print('S_i\t', before[0], before[1])
        tf.print('S_i+1\t', tokens_pretty_print(stack[0]), tf.argmax(stack[1]))
        tf.print('O_i+1\t', tokens_pretty_print(output[0]), tf.argmax(output[1]))
        
        
    return output

productions = tf.one_hot([2, 3, 0, 1, 0], PRODUCTION_DIM)

stack_shape = (STACK_SIZE, TOKEN_DIM)
d_S = tf.constant(S, dtype=tf.float32)

with tf.GradientTape(persistent = True) as tape:
    output = generate(grammar, productions, stack_shape, d_S, is_phi)
    
# tf.print(output)

p	 2
G_s	 S O T  (2, 1)
G_o	 _ _ _  (2, 1)
S_i	 S _ _ _ _ _ _ _ _ _  1
S_i+1	 T O S _ _ _ _ _ _ _  3
O_i+1	 _ _ _ _ _ _ _ _ _ _  0
p	 3
G_s	 T _ _  (3, 1)
G_o	 _ _ _  (3, 1)
S_i	 T O S _ _ _ _ _ _ _  3
S_i+1	 T O T _ _ _ _ _ _ _  3
O_i+1	 _ _ _ _ _ _ _ _ _ _  0
p	 0
G_s	 _ _ _  (0, 3)
G_o	 x _ _  (0, 3)
S_i	 T O T _ _ _ _ _ _ _  3
S_i+1	 T O _ _ _ _ _ _ _ _  2
O_i+1	 x _ _ _ _ _ _ _ _ _  1
p	 1
G_s	 _ _ _  (1, 2)
G_o	 + _ _  (1, 2)
S_i	 T O _ _ _ _ _ _ _ _  2
S_i+1	 T _ _ _ _ _ _ _ _ _  1
O_i+1	 x + _ _ _ _ _ _ _ _  2
p	 0
G_s	 _ _ _  (0, 3)
G_o	 x _ _  (0, 3)
S_i	 T _ _ _ _ _ _ _ _ _  1
S_i+1	 _ _ _ _ _ _ _ _ _ _  0
O_i+1	 x + x _ _ _ _ _ _ _  3
