In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [3]:
x = tf.Variable(4.0)

with tf.GradientTape() as tape:
    y = x**2   # gtape records all op that occurs in forward pass

In [4]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=16.0>

In [5]:
dy_dx = tape.gradient(y, x)  #calc the gradients of y wrt input x(i.e if x changes how much does y change)

dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [7]:
w = tf.Variable(tf.random.normal((4, 2)))

w

<tf.Variable 'Variable:0' shape=(4, 2) dtype=float32, numpy=
array([[ 0.6637846 , -1.1250737 ],
       [-0.11745531, -1.467821  ],
       [ 0.7746675 , -1.0622373 ],
       [ 0.1568039 ,  0.6385962 ]], dtype=float32)>

In [9]:
b = tf.Variable(tf.ones(2, dtype = tf.float32))

b

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>

In [10]:
x = tf.Variable([[10., 20., 30., 40.]], dtype = tf.float32)

x

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[10., 20., 30., 40.]], dtype=float32)>

In [11]:
# persistent = true allows a gradient tape of same operations to be invoked multiple times
with tf.GradientTape(persistent = True) as tape:
    y = tf.matmul(x, w) + b
    
    loss = tf.reduce_mean(y**2) # use mean of values from y array as loss

In [12]:
[d1_dw, d1_db] = tape.gradient(loss, [w, b])    # calc gradient of loss wrt w and b

In [17]:
d1_dw

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[  348.00922,  -459.30426],
       [  696.01843,  -918.6085 ],
       [ 1044.0277 , -1377.9128 ],
       [ 1392.0369 , -1837.217  ]], dtype=float32)>

In [18]:
loss

<tf.Tensor: shape=(), dtype=float32, numpy=1660.3542>

In [20]:
# another test
layer = tf.keras.layers.Dense(2, activation = "relu")

x = tf.constant([[10., 20., 30.]])

In [21]:
with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_sum(y**2)
    
grad = tape.gradient(loss, layer.trainable_variables)

In [22]:
grad

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>]

**To show that GradientTape doesn't track non-trainable var, constants, tensors auto**

In [23]:
x1 = tf.Variable(5.0)

x1

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0>

In [24]:
x2 = tf.Variable(5.0, trainable = False)  # non-trainable var

x2

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0>

In [25]:
x3 = tf.add(x1, x2)

x3

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

In [26]:
x4 = tf.constant(5.0)

x4

<tf.Tensor: shape=(), dtype=float32, numpy=5.0>

In [27]:
with tf.GradientTape() as tape:
    y = (x1**2) + (x2**2) + (x3**2) + (x4**2)
    
grad = tape.gradient(y, [x1, x2, x3, x4])  # calc gradient of y wrt x1, x2, x3, x4

grad #gives gradient value of x1(10.0) since others can't be tracked by default

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None, None, None]

In [28]:
# how to track constants, tensors e.t.c

x1 = tf.constant(5.0)

x2 = tf.Variable(3.0)

In [32]:
with tf.GradientTape(persistent = True) as tape:
    tape.watch(x1)  # allows constant to be tracked
    
    y = (x1**2) + (x2**2)
    
grad = tape.gradient(y, [x1, x2])

grad

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.0>]

In [33]:
# explicitly determing what var to track in gradient tape(tracks only what we watch using tape.watch)
with tf.GradientTape(watch_accessed_variables=False) as tape:
    tape.watch(x1) # tracks only x1
    
    y = (x1**2) + (x2**2)
    
grad = tape.gradient(y, [x1, x2])

grad

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None]

In [34]:
x = tf.constant(1.0)
x1 = tf.Variable(5.0)
x2 = tf.Variable(3.0)

In [35]:
with tf.GradientTape(persistent = True) as tape:
    tape.watch(x)
    
    # tracks only actual operation that was ran and not all ctrl flow block
    if(x > 0.0):  # this block operation will be tracked in this case since x > 0.0
        result = x1**2
    else:
        result = x2**2
        
dx1, dx2 = tape.gradient(result, [x1, x2])

dx1, dx2

(<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None)

In [36]:
x = tf.constant(-1.0)
x1 = tf.Variable(5.0)
x2 = tf.Variable(3.0)

In [37]:
with tf.GradientTape(persistent = True) as tape:
    tape.watch(x)
    
    # tracks only actual operation that was ran and not all ctrl flow block
    if(x > 0.0):
        result = x1**2
    else: # this block op will be tracked in this case
        result = x2**2
        
dx1, dx2 = tape.gradient(result, [x1, x2])

dx1, dx2 # x1 is None

(None, <tf.Tensor: shape=(), dtype=float32, numpy=6.0>)