In [1]:
import numpy as np # conda install numpy
import tensorflow as tf

In [2]:
tf.__version__ # '2.3.1'

'2.3.1'

## 一階導數

In [3]:
# x = tf.constant(3.0)
x = tf.constant(5.0)
with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
dy_dx = g.gradient(y, x) # Will compute to 6.0
# dy_dx = g.gradient(y, x) # RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.
dy_dx.numpy()

10.0

## 二階導數

In [4]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    with tf.GradientTape() as gg:
        gg.watch(x)
        y = x * x
    dy_dx = gg.gradient(y, x)     # Will compute to 6.0
d2y_dx2 = g.gradient(dy_dx, x)  # Will compute to 2.0

dy_dx.numpy(), d2y_dx2.numpy()

(6.0, 2.0)

In [7]:
x = tf.constant(3.0)
# x = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as g:
# with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
    z = y * y
dz_dx = g.gradient(z, x)  # 108.0 (4*x^3 at x = 3)
dy_dx = g.gradient(y, x)  # 6.0
del g  # Drop the reference to the tape

dy_dx.numpy(), dz_dx.numpy()

(6.0, 108.0)

## 多變數偏微分

In [8]:
X = tf.constant([[2.0], 
                 [3.0]])
W = tf.Variable([[4.0, 2.0], 
                 [2.0, 1.0]])
b = tf.Variable([[5.0], 
                 [2.0]])
y_p = W @ X + b
y_p.numpy()

array([[19.],
       [ 9.]], dtype=float32)

In [9]:
def loss(y, y_pred):
    return tf.reduce_mean(tf.square(y - y_pred))

y = np.array([[20.0], 
              [7.0]])
current_loss = loss(y, y_p)
current_loss.numpy()

2.5

In [10]:
with tf.GradientTape(persistent=True) as t:
    t.watch(X)
    y_p2 = W @ X + b
dW, db = t.gradient(y_p2, [W, b])
dX = t.gradient(y_p2, X)
del t
print(f'dW:\n{dW.numpy()}\ndb:\n{db.numpy()}')
print(f'dX:\n{dX.numpy()}')

dW:
[[2. 3.]
 [2. 3.]]
db:
[[1.]
 [1.]]
dX:
[[6.]
 [3.]]


In [11]:
with tf.GradientTape(persistent=True) as t:
    t.watch(X)
    current_loss = loss(y, W @ X + b)
dW, db = t.gradient(current_loss, [W, b])
dX = t.gradient(current_loss, X)
del t
print(f'dW:\n{dW.numpy()}\ndb:\n{db.numpy()}')
print(f'dX:\n{dX.numpy()}')

dW:
[[-2. -3.]
 [ 4.  6.]]
db:
[[-1.]
 [ 2.]]
dX:
[[0.]
 [0.]]


In [14]:
with tf.GradientTape(persistent=True) as t:
    t.watch(X)
    current_loss = loss(y, W @ X + b)
dW = t.gradient(current_loss, W)
db = t.gradient(current_loss, b)
dX = t.gradient(current_loss, X)
del t
print(f'dW:\n{dW.numpy()}\ndb:\n{db.numpy()}')
print(f'dX:\n{dX.numpy()}')

dW:
[[-2. -3.]
 [ 4.  6.]]
db:
[[-1.]
 [ 2.]]
dX:
[[0.]
 [0.]]
