In [1]:
import tensorflow as tf

In [2]:
def foo2(x, y):
    a = x * x
    b = x * x * x
    
    return y * b + (1 - y) * a

x = tf.Variable(2.0)
y = tf.Variable(0.1)

with tf.GradientTape(persistent=True) as tape:
    z = foo2(x, y)

print(z)
print(tape.gradient(z, x))
print(tape.gradient(z, y))

tf.Tensor(4.4, shape=(), dtype=float32)
tf.Tensor(4.8, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [3]:
@tf.custom_gradient
def foo(x, y):
    def grad(g):
        if y > 0.5:
            dy_dx = 2 * x
        else:
            dy_dx = 3 * x * x

        return dy_dx * g, g
    if y > 0.5:
        return x * x, grad
    else:
        return x * x * x, grad

x = tf.Variable(2.0)
y = tf.Variable(0.6)

with tf.GradientTape(persistent=True) as tape:
    z = foo(x, y)

print(tape.gradient(z, x))
print(tape.gradient(z, y))

tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)


In [4]:
@tf.function
def to_prob_dist(v):
    v2 = tf.sqrt(tf.square(v)+1e-9)
    # v2 = tf.sqrt(tf.square(v))
    m = tf.expand_dims(tf.reduce_sum(v2, axis=-1),-1)
    n = tf.math.divide_no_nan(v2, m)
    return n

tf.print(to_prob_dist([1.0, 0.0]))
tf.print(to_prob_dist([0.1, 0.0]))
tf.print(to_prob_dist([0.1, 0.1]))
tf.print(to_prob_dist([78.1, 21.1]))
tf.print(to_prob_dist([2.0, 1.0]))
x = tf.Variable([
        [3.0, 1.0],
        [7.0, 1.0],
        [1.0, 1.0],
        [1.0, 0.0],
    ])

with tf.GradientTape() as tape:
    z = to_prob_dist(x)
    loss = tf.nn.l2_loss(z)
print('-'*80)
tf.print(z)
tf.print(tape.gradient(loss, x))
# tf.print(tape.gradient(z, x))

[0.99996841 3.16217775e-05]
[0.999683917 0.000316127785]
[0.5 0.5]
[0.787298381 0.212701619]
[0.666666687 0.333333343]
--------------------------------------------------------------------------------
[[0.75 0.25]
 [0.875 0.125]
 [0.5 0.5]
 [0.99996841 3.16217775e-05]]
[[0.03125 -0.09375]
 [0.0117187509 -0.08203125]
 [0 0]
 [3.15904617e-05 -0]]


In [5]:
@tf.function
def to_prob_dist_all(v):
    v2 = tf.sqrt(tf.square(v)+1e-9)
    # v2 = tf.sqrt(tf.square(v))
    m = tf.expand_dims(tf.reduce_sum(v2),-1)
    n = tf.math.divide_no_nan(v2, m)
    return n

tf.print(to_prob_dist([1.0, 0.0]))
tf.print(to_prob_dist([0.1, 0.0]))
tf.print(to_prob_dist([0.1, 0.1]))
tf.print(to_prob_dist([78.1, 21.1]))
tf.print(to_prob_dist([2.0, 1.0]))
x = tf.Variable([
        [3.0, 1.0],
        [7.0, 1.0],
        [1.0, 1.0],
        [1.0, 0.0],
    ])

with tf.GradientTape() as tape:
    z = to_prob_dist_all(x)
    loss = tf.nn.l2_loss(z)
print('-'*80)
tf.print(z)
tf.print(tape.gradient(loss, x))
# tf.print(tape.gradient(z, x))

[0.99996841 3.16217775e-05]
[0.999683917 0.000316127785]
[0.5 0.5]
[0.787298381 0.212701619]
[0.666666687 0.333333343]
--------------------------------------------------------------------------------
[[0.199999586 0.0666665286]
 [0.466665685 0.0666665286]
 [0.0666665286 0.0666665286]
 [0.0666665286 2.10818075e-06]]
[[-0.00533327274 -0.0142221246]
 [0.01244443 -0.0142221246]
 [-0.0142221246 -0.0142221246]
 [-0.0142221246 -0]]


In [6]:
def foo3(x, y, op):
    a = x + y
    b = x - y

    op = to_prob_dist(op)
    res = tf.stack([a, b])
    c = tf.tensordot(res, op, 1)

    return c

x = tf.Variable(3.0)
y = tf.Variable(2.0)
op = tf.Variable([0.5, 0.5])

with tf.GradientTape(persistent=True) as tape:
    z = foo3(x, y, op)
    loss = tf.nn.l2_loss(z - 1.0)

tf.print(z, loss)
print(tape.gradient(loss, x))
print(tape.gradient(loss, y))
print(tape.gradient(loss, op))

3 2
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor([ 4. -4.], shape=(2,), dtype=float32)


In [7]:
opt = tf.keras.optimizers.SGD(1e-1)
# opt = tf.keras.optimizers.Adam(3e-4)
target = tf.constant(1.0)

x = tf.Variable(3.0)
y = tf.Variable(2.0)
op = tf.Variable([0.5, 1.0])

for i in range(100):
    with tf.GradientTape() as tape:
        z = foo3(x, y, op)
        loss = tf.nn.l2_loss(z - target)
    variables = [x, y, op]
    grads = tape.gradient(loss,variables)
    opt.apply_gradients(zip(grads, variables))

    op.assign(to_prob_dist(op))

    if i % 10 == 0:
        tf.print(loss, x, op, y, z)


0.888889074 2.86666656 [0.190348491 0.809651494] 2.04444456 2.33333349
7.91181665e-05 2.8198216 [0.061551556 0.938448429] 2.06532216 0.987420797
5.18369419e-08 2.81931615 [0.0597038157 0.940296173] 2.06576061 0.999678
3.18962634e-11 2.81930304 [0.0596567206 0.940343261] 2.06577206 0.999992
1.77635684e-15 2.81930304 [0.0596555285 0.940344512] 2.06577253 0.99999994
7.10542736e-15 2.81930304 [0.0596555136 0.940344512] 2.06577253 1.00000012
7.10542736e-15 2.81930304 [0.0596555434 0.940344512] 2.06577253 0.999999881
0 2.81930304 [0.0596555285 0.940344512] 2.06577253 1
7.10542736e-15 2.81930304 [0.0596555136 0.940344512] 2.06577253 1.00000012
7.10542736e-15 2.81930304 [0.0596555434 0.940344512] 2.06577253 0.999999881


In [8]:
@tf.function
def dot(a, b):
    return tf.reduce_sum(tf.multiply(a, b), axis=-1)

a = [2.0, 3.0]
b = [[0.0, 1.0],
 [0.0, 1.0]]
tf.print(dot(a,b))
tf.print(dot([2.0, 3.0], [0.5, 0.5]))

[3 3]
2.5


In [9]:
@tf.function
def foo5(v, s, op):
    s = to_prob_dist(s)
    op = to_prob_dist(op)

    xy = dot(v,s)
    x = xy[0]
    y = xy[1]

    a = x + y
    b = x - y
    res = tf.stack([a, b])
    c = dot(res, op)

    return c

v = tf.constant([2.0, 3.0])
s = tf.Variable([
    [1.0, 0.0],
    [0.0, 1.0],
])
op = tf.Variable([0.0, 1.0])
target = tf.constant(1.0)

with tf.GradientTape(persistent=True) as tape:
    z = foo5(v, s, op)
    loss = tf.nn.l2_loss(z - target)

tf.print(z, loss)
# print(tape.gradient(z, s))
print(tape.gradient(loss, s))
print(tape.gradient(loss, op))

-0.999746859 1.9994936
tf.Tensor(
[[6.341934e-05 0.000000e+00]
 [0.000000e+00 6.341934e-05]], shape=(2, 2), dtype=float32)
tf.Tensor([0.         0.00037932], shape=(2,), dtype=float32)


In [10]:
v = tf.constant([2.0, 3.0])
s = tf.Variable([
    [0.5, 0.5],
    [0.5, 0.5],
])
# s = tf.Variable([
#     [0.0, 1.0],
#     [1.0, 0.0],
# ])
op = tf.Variable([0.5, 0.5])
target = tf.constant(1.0)

with tf.GradientTape(persistent=True) as tape:
    z = foo5(v, s, op)
    loss = tf.nn.l2_loss(z - target)

tf.print(z, loss)
# print(tape.gradient(z, s))
print(tape.gradient(loss, s))
print(tape.gradient(loss, op))

2.5 1.125
tf.Tensor(
[[-0.75  0.75]
 [ 0.    0.  ]], shape=(2, 2), dtype=float32)
tf.Tensor([ 3.75 -3.75], shape=(2,), dtype=float32)


In [11]:
def collapse_prob(v):
    v = tf.convert_to_tensor(v, dtype=tf.float32)

    if tf.rank(v) == 1:
        v = tf.expand_dims(v, 0)
    arr = []
    for vv in v:
        a = tf.argmax(vv)
        s = tf.shape(vv)[0]
        arr.append(tf.eye(s, dtype=tf.float32)[a])
    return tf.squeeze(tf.stack(arr))

tf.print(collapse_prob([0.4,0.6]))
tf.print(collapse_prob(tf.Variable([
    [0.1, 0.2],
    [0.2, 0.1],
])))

[0 1]
[[0 1]
 [1 0]]


In [12]:
opt = tf.keras.optimizers.SGD(1e-2)
# opt = tf.keras.optimizers.Adam(3e-4)

v = tf.constant([2.0, 3.0])
s = tf.Variable([
    [0.5, 0.5],
    [0.5, 0.5],
])
op = tf.Variable([0.5, 0.5])
target = tf.constant(1.0)

for i in range(1000):
    op.assign(to_prob_dist(op))
    s.assign(to_prob_dist(s))

    with tf.GradientTape() as tape:
        z = foo5(v, s, op)
        loss = tf.nn.l2_loss(z - target)
    variables = [s, op]
    grads = tape.gradient(loss,variables)
    opt.apply_gradients(zip(grads, variables))

    if i % 100 == 0:
        c_s = collapse_prob(s)
        c_op = collapse_prob(op)
        xy = tf.tensordot(v, c_s, 1)
        c_z = foo5(v, c_s, c_op)
        tf.print(loss, z, c_z, xy[0], xy[1], c_op)

1.125 2.5 0.000126489118 5 0 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]
7.10542736e-15 1.00000012 -0.999746859 2 3 [0 1]


In [13]:
def gen_ops_matrix(v):
    v_s = tf.shape(v)
    v_t = tf.tile(v, [v_s[0]])
    v_t = tf.reshape(v_t, [v_s[0],-1])

    op1 = tf.expand_dims(v_t + tf.expand_dims(v,-1), 0)
    op2 = tf.expand_dims(v_t - tf.expand_dims(v,-1), 0)
    ops = tf.concat([op1, op2], axis=0)

    return ops

In [14]:
@tf.function
def foo6(a, b, op, ops):
    s = tf.tensordot(a, b, 0)
    s = to_prob_dist_all(s)
    op = to_prob_dist_all(op)

    pair_choose = tf.reduce_sum(ops * s, axis=[1,2])
    op_choose = tf.reduce_sum(pair_choose * op)

    return op_choose

v = tf.constant([2.0, 3.0])
a = tf.Variable([1.0, 0.0])
b = tf.Variable([1.0, 1.0])
op = tf.Variable([0.0, 1.0])
target = tf.constant(1.0)
ops = gen_ops_matrix(v)

with tf.GradientTape(persistent=True) as tape:
    z = foo6(a, b, op, ops)
    loss = tf.nn.l2_loss(z - target)

tf.print(z, loss)
# print(tape.gradient(z, s))
tf.print(tape.gradient(loss, [a, b]))
tf.print(tape.gradient(loss, op))

0.500094891 0.124952562
[[-1.57877803e-05 0], [0.124964438 -0.124980226]]
[0 6.32405281e-05]


In [15]:
opt = tf.keras.optimizers.SGD(1e-2)
# opt = tf.keras.optimizers.Adam(3e-4)

v = tf.constant([2.0, 3.0])
a = tf.Variable([0.5, 0.5])
b = tf.Variable([0.5, 0.5])
op = tf.Variable([0.5, 0.5])
target = tf.constant(1.0)
ops = gen_ops_matrix(v)

for i in range(100):
    with tf.GradientTape() as tape:
        z = foo6(a, b, op, ops)
        loss = tf.nn.l2_loss(z - target)

    variables = [a, b, op]
    grads = tape.gradient(loss, variables)
    opt.apply_gradients(zip(grads, variables))
    op.assign(to_prob_dist_all(op))
    a.assign(to_prob_dist_all(a))
    b.assign(to_prob_dist_all(b))

    if i % 10 == 0:
        tf.print(loss, z, tf.round(a*100), tf.round(b*100), tf.round(op*100))


1.125 2.5 [50 50] [51 49] [46 54]
0.0523815304 1.32367122 [49 51] [54 46] [26 74]
0.0013753731 1.05244756 [49 51] [55 45] [22 78]
3.04003443e-05 1.00779748 [49 51] [55 45] [21 79]
6.52791414e-07 1.00114262 [49 51] [55 45] [21 79]
1.39664564e-08 1.00016713 [49 51] [55 45] [21 79]
2.98605585e-10 1.00002444 [49 51] [55 45] [21 79]
6.39488462e-12 1.00000358 [49 51] [55 45] [21 79]
2.55795385e-13 1.00000072 [49 51] [55 45] [21 79]
7.10542736e-15 1.00000012 [49 51] [55 45] [21 79]
