In [None]:
import tensorflow as tf

# Basic Gradient Tape

## Gradient Tape Usage

In [None]:
x = tf.ones((2, 2))

with tf.GradientTape() as t:
    # Record the actions performed on tensor x with `watch`
    t.watch(x) 

    # Define y as the sum of the elements in x
    y =  tf.reduce_sum(x)

    # Let z be the square of y
    z = tf.square(y) 

# Get the derivative of z wrt the original input tensor x
dz_dx = t.gradient(z, x)
print(dz_dx)

tf.Tensor(
[[8. 8.]
 [8. 8.]], shape=(2, 2), dtype=float32)


## Gradient tape expires after one use, by default

If you want to compute multiple gradients, note that by default, GradientTape is not persistent (`persistent=False`).  This means that the GradientTape will expire after you use it to calculate a gradient.


In [None]:
x = tf.constant(3.0)

with tf.GradientTape() as t:
    t.watch(x)
    y = x * x
    z = y * y

# z = x^4, dz/dx = 4*x^3 at x = 3 --> 108.0
dz_dx = t.gradient(z, x)
print(dz_dx)

tf.Tensor(108.0, shape=(), dtype=float32)


In [None]:
# If you try to compute dy/dx after the gradient tape has expired:
try:
    dy_dx = t.gradient(y, x)  # 6.0
    print(dy_dx)
except RuntimeError as e:
    print(e)

A non-persistent GradientTape can only be used tocompute one set of gradients (or jacobians)


In [None]:
# To make sure that the gradient tape can be used multiple times, set persistent=True
x = tf.constant(3.0)

with tf.GradientTape(persistent=True) as t:
    t.watch(x)
    y = x * x
    z = y * y

dz_dx = t.gradient(z, x)
print(dz_dx)

tf.Tensor(108.0, shape=(), dtype=float32)


In [None]:
t.gradient(y, x)

<tf.Tensor: shape=(), dtype=float32, numpy=6.0>

## Nested Gradient tapes

**Acceptable indentation of the first gradient calculation**

Keep in mind that you'll want to make sure that the first gradient calculation of `dy_dx` should occur at least inside the outer `with` block.

In [None]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x
    dy_dx = tape_1.gradient(y, x)
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


### Where not to indent the first gradient calculation
If the first gradient calculation is OUTSIDE of the outer `with` block, it won't persist for the second gradient calculation.

In [None]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x

# The first gradient call is outside the outer with block
# so the tape will expire after this
dy_dx = tape_1.gradient(y, x)

# The tape is now expired and the gradient output will be `None`
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
None


Notice how the `d2y_dx2` calculation is now `None`.  The tape has expired.  Also note that this still won't work even if you set persistent=True for both gradient tapes.

## Proper indentation for the second gradient calculation

The second gradient calculation `d2y_dx2` can be indented as much as the first calculation of `dy_dx` but not more.

In [None]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x
        dy_dx = tape_1.gradient(y, x)
        # this is acceptable
        d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [None]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x
        dy_dx = tape_1.gradient(y, x)
    # this is also acceptable
    d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [None]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x
        dy_dx = tape_1.gradient(y, x)
# this is also acceptable
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
