In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt

## Example 1

In [2]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x**2
    
dy_dx = tape.gradient(y, x)
print(dy_dx.numpy())

6.0


## Example 2
Notes: <b>persistent=True</b> argument for the GradientTape let's you call gradient multiple times

In [3]:
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype='float32'), name='b')
x = [[3.0, 2.0, 1.0]] # Shape is (1, 3)

with tf.GradientTape() as tape:
    y = x @ w + b
    l = tf.reduce_mean(y ** 2)
    
dl_dw, dl_db = tape.gradient(l, [w, b])
print(dl_dw, dl_db)

tf.Tensor(
[[-5.9031854  -2.102612  ]
 [-3.935457   -1.4017413 ]
 [-1.9677285  -0.70087063]], shape=(3, 2), dtype=float32) tf.Tensor([-1.9677285  -0.70087063], shape=(2,), dtype=float32)


In [4]:
# We can give the names in a dictionary
w = tf.Variable(tf.random.normal((3, 2)))
b = tf.Variable(tf.zeros(2, dtype='float32'))
x = [[3.0, 2.0, 1.0]] # Shape is (1, 3)

with tf.GradientTape(persistent=True) as tape:
    y = x @ w + b
    l = tf.reduce_mean(y ** 2)

my_vars = {
    'w':w,
    'b':b,
}
grads = tape.gradient(l, my_vars)
print(grads) # Returns a dictionary

{'w': <tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[13.088335 , -2.3886986],
       [ 8.725556 , -1.5924658],
       [ 4.362778 , -0.7962329]], dtype=float32)>, 'b': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 4.362778 , -0.7962329], dtype=float32)>}


## Example 3: Gradients with respect to a model

In [5]:
model = tf.keras.layers.Dense(units=2, activation='sigmoid')
x = tf.constant([[1, 3, 2]], dtype='float32')
y = tf.constant([[1, 0]], dtype='float32')

with tf.GradientTape(persistent=True) as tape:
    y_hat = model(x)
    loss = tf.multiply(-y, tf.math.log(y_hat)) - tf.multiply((1 - y), tf.math.log(1 - y_hat))
    loss = tf.reduce_sum(loss, axis=1)
    loss = tf.reduce_sum(loss, axis=0)
    
grads = tape.gradient(loss, model.trainable_variables)
print(grads)

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[-0.09957576,  0.13712737],
       [-0.29872727,  0.4113821 ],
       [-0.19915152,  0.27425474]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([-0.09957576,  0.13712737], dtype=float32)>]


In [6]:
for var, g in zip(model.trainable_variables, grads):
    print(f'{var.name}, shape: {g.shape}')

dense/kernel:0, shape: (3, 2)
dense/bias:0, shape: (2,)


### Notes
Gradient tape only works if there are variables to watch and the code below fails since x1, x2, x3 are not variables

In [7]:
# A trainable variable
x0 = tf.Variable(3.0, name='x0')
# Not trainable
x1 = tf.Variable(3.0, name='x1', trainable=False)
# Not a Variable: A variable + tensor returns a tensor.
x2 = tf.Variable(2.0, name='x2') + 1.0
# Not a variable
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape(persistent=True) as tape:
    y = (x0 ** 2) + (x1 ** 2) + (x2 ** 2) + (x3 ** 2)
    
grads = tape.gradient(y, [x0, x1, x2, x3])

for grad in grads:
    print(grad)

tf.Tensor(6.0, shape=(), dtype=float32)
None
None
None


In [8]:
# Use function below to see what tape is watching
tape.watched_variables()

(<tf.Variable 'x0:0' shape=() dtype=float32, numpy=3.0>,)

### Notes
GradientTape does not watch over tensors but you can force GradientTape to watch over it

In [9]:
x = tf.constant(3.0, dtype='float32')

with tf.GradientTape(persistent=True) as tape:
    tape.watch(x)
    y = x**2

grad = tape.gradient(y, x)
print(grad)

tf.Tensor(6.0, shape=(), dtype=float32)


Conversely, to disable the default behavior of watching all `tf.Variables`, set `watch_accessed_variables=False` when creating the gradient tape. This calculation uses two variables, but only connects the gradient for one of the variables:

In [10]:
x0 = tf.Variable(0.2, dtype='float32')
x1 = tf.Variable(0.5, dtype='float32')

with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape:
    tape.watch(x1)
    y0 = tf.math.sin(x0)
    y1 = tf.math.tanh(x1)
    ys = y0 + y1

grads = tape.gradient(ys, {'x0':x0, 'x1':x1})

for grad in grads:
    print(grad, grads[grad])

x0 None
x1 tf.Tensor(0.7864477, shape=(), dtype=float32)
