In [1]:
import tensorflow as tf

In [12]:
def foo2(x, y):
    a = x * x
    b = x * x * x
    
    return y * b + (1 - y) * a

x = tf.Variable(2.0)
y = tf.Variable(0.1)

with tf.GradientTape(persistent=True) as tape:
    z = foo2(x, y)

print(z)
print(tape.gradient(z, x))
print(tape.gradient(z, y))

tf.Tensor(4.4, shape=(), dtype=float32)
tf.Tensor(4.8, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [27]:
@tf.custom_gradient
def foo(x, y):
    def grad(g):
        if y > 0.5:
            dy_dx = 2 * x
        else:
            dy_dx = 3 * x * x

        return dy_dx * g, g
    if y > 0.5:
        return x * x, grad
    else:
        return x * x * x, grad

x = tf.Variable(2.0)
y = tf.Variable(0.6)

with tf.GradientTape(persistent=True) as tape:
    z = foo(x, y)

print(tape.gradient(z, x))
print(tape.gradient(z, y))

tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)


In [221]:
def to_prob_dist(v):
    v2 = tf.sqrt(tf.square(v)+1e-9)
    m = tf.expand_dims(tf.reduce_sum(v2, axis=-1),-1)
    n = tf.math.divide_no_nan(v2, m)
    return n

tf.print(to_prob_dist([1.0, 0.0]))
tf.print(to_prob_dist([0.1, 0.0]))
tf.print(to_prob_dist([0.1, 0.1]))
tf.print(to_prob_dist([78.1, 21.1]))
tf.print(to_prob_dist([2.0, 1.0]))
x = tf.Variable([
        [3.0, 1.0],
        [7.0, 1.0],
        [1.0, 1.0],
        [1.0, 0.0],
    ])

with tf.GradientTape() as tape:
    z = to_prob_dist(x)
    loss = tf.nn.l2_loss(z)
print('-'*80)
tf.print(z)
tf.print(tape.gradient(loss, x))
# tf.print(tape.gradient(z, x))

[0.99996841 3.16217775e-05]
[0.999683917 0.000316127785]
[0.5 0.5]
[0.787298381 0.212701619]
[0.666666687 0.333333343]
--------------------------------------------------------------------------------
[[0.75 0.25]
 [0.875 0.125]
 [0.5 0.5]
 [0.99996841 3.16217775e-05]]
[[0.03125 -0.09375]
 [0.0117187509 -0.08203125]
 [0 0]
 [3.15904617e-05 -0]]


In [97]:
def foo3(x, y, op):
    a = x + y
    b = x - y

    op = to_prob_dist(op)
    res = tf.stack([a, b])
    c = tf.tensordot(res, op, 1)

    return c

x = tf.Variable(3.0)
y = tf.Variable(2.0)
op = tf.Variable([0.5, 0.5])

with tf.GradientTape(persistent=True) as tape:
    z = foo3(x, y, op)
    loss = tf.nn.l2_loss(z - 1.0)

tf.print(z, loss)
print(tape.gradient(loss, x))
print(tape.gradient(loss, y))
print(tape.gradient(loss, op))

3 2
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor([ 4. -4.], shape=(2,), dtype=float32)


In [100]:
opt = tf.keras.optimizers.SGD(1e-1)
# opt = tf.keras.optimizers.Adam(3e-4)
target = tf.constant(1.0)

x = tf.Variable(3.0)
y = tf.Variable(2.0)
op = tf.Variable([0.5, 1.0])

for i in range(100):
    with tf.GradientTape() as tape:
        z = foo3(x, y, op)
        loss = tf.nn.l2_loss(z - target)
    variables = [x, y, op]
    grads = tape.gradient(loss,variables)
    opt.apply_gradients(zip(grads, variables))

    op.assign(to_prob_dist(op))

    if i % 10 == 0:
        tf.print(loss, x, op, y, z)


0.888889074 2.86666656 [0.190348491 0.809651494] 2.04444456 2.33333349
7.91234197e-05 2.8198216 [0.061551623 0.93844837] 2.06532216 0.98742038
5.19137373e-08 2.81931663 [0.0597037375 0.940296173] 2.06576061 0.999677777
3.18962634e-11 2.81930351 [0.0596566834 0.94034338] 2.06577229 0.999992
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1
0 2.81930351 [0.059655454 0.940344572] 2.06577277 1


In [222]:
def foo5(v, s, op):
    # s = tf.nn.softmax(s)
    # op = tf.nn.softmax(op)
    s = to_prob_dist(s)
    op = to_prob_dist(op)

    xy = tf.tensordot(v, s, 1)
    x = xy[0]
    y = xy[1]

    a = x + y
    b = x - y
    res = tf.stack([a, b])
    c = tf.tensordot(res, op, 1)

    return c

v = tf.constant([2.0, 3.0])
s = tf.Variable([
    [1.0, 0.0],
    [0.0, 1.0],
])
op = tf.Variable([0.0, 1.0])
target = tf.constant(1.0)

with tf.GradientTape(persistent=True) as tape:
    z = foo5(v, s, op)
    loss = tf.nn.l2_loss(z - target)

tf.print(z, loss)
# print(tape.gradient(z, s))
print(tape.gradient(loss, s))
print(tape.gradient(loss, op))

-0.999747097 1.99949408
tf.Tensor(
[[-0.00025272  0.        ]
 [-0.          0.00037956]], shape=(2, 2), dtype=float32)
tf.Tensor([-0.          0.00037932], shape=(2,), dtype=float32)
