In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

# TensorFlow ≥2.4 is required in this notebook
# Earlier 2.x versions will mostly work the same, but with a few bugs
import tensorflow as tf
from tensorflow import keras

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Computing gradient with Autodiff

In [2]:
def f(w1, w2):
    return 3 * w1 **2 + 2 * w1 * w2


In [4]:
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps,w2) - f(w1,w2))/eps


36.000003007075065

In [6]:
(f(w1, w2 + eps) - f(w1,w2)) / eps

10.000000003174137

In [7]:
w1, w2 =tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape :
    z = f(w1,w2)
gradients = tape.gradient(z,[w1,w2])

In [8]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

1. Default if want call tape is must delete 

In [9]:
# default after calls the tape the record has been del, if want call multiple 
# tape must be custom tape

# if want use the default
with tf.GradientTape() as tape:
    z = f(w1,w2)
dz_w1 = tape.gradient(z,w1)
dz_w2 = tape.gradient(z,w2)

RuntimeError: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)

In [12]:
# if want used custom gradient
with tf.GradientTape(persistent=True)as tape:
    z = f(w1,w2)
dz_w1 = tape.gradient(z,w1)
dz_w2 = tape.gradient(z,w2)
del tape #dont be leak memory

In [13]:
dz_w1,dz_w2

(<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>)

2. Gradient tape only record the variable type

if want used the constant type isnt worked

-> just ill be result is none, the recorded on gradient tape isnt recored right 

In [15]:
c1,c2 = tf.constant(5.),tf.constant(3.)
with tf.GradientTape() as tape:
    z=f(c1,c2)
gradients=tape.gradient(z,[c1,c2])

In [16]:
gradients

[None, None]

solution is wused tf.watch(), thats ill be force the constant type to be readed on gradient tape

In [18]:
with tf.GradientTape() as tape:
    tape.watch(c1)
    tape.watch(c2)
    z = f(c1,c2)
gradients=tape.gradient(z,[c1,c2])

In [19]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

3. Multiple loss on the vector

isnt be multiple loss u must get the induvidual gradients on vector gradient case

So if you ever need to get the individual gradients (e.g., the gradients of each loss with regard to the model parameters), you must
call the tape’s jacobian() method

In [20]:
with tf.GradientTape() as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tape.gradient([z1, z2, z3], [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=136.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=30.0>]

In [21]:
with tf.GradientTape(persistent=True) as tape:
    z1 = f(w1, w2 + 2.)
    z2 = f(w1, w2 + 5.)
    z3 = f(w1, w2 + 7.)

tf.reduce_sum(tf.stack([tape.gradient(z, [w1, w2]) for z in (z1, z2, z3)]), axis=0)
del tape

In [22]:
with tf.GradientTape(persistent=True) as hessian_tape:
    with tf.GradientTape() as jacobian_tape:
        z=f(w1,w2)
    jacobians = jacobian_tape.gradient(z,[w1,w2])
hessians = [hessian_tape.gradient(jacobians,[w1,w2])
            for jacobian in jacobians]
del hessian_tape

In [25]:
# thats the single value gradients must be converted from multiple loss 
jacobians

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [24]:
#mutliple loss -> vector loss
hessians

[[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=2.0>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=8.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=2.0>]]