In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
x = tf.Variable(4.0)
with tf.GradientTape() as tape:
    y = x**2

In [3]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=16.0>

In [4]:
dy_dx = tape.gradient(y, x) # y = x**2 => dy/dx = 2x
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [5]:
w = tf.Variable(tf.random.normal(shape=[4, 2]))
w

<tf.Variable 'Variable:0' shape=(4, 2) dtype=float32, numpy=
array([[-1.2218944 ,  0.9965784 ],
       [-0.53399515,  1.356813  ],
       [ 1.3740774 ,  1.5375139 ],
       [ 0.20006336, -0.87719804]], dtype=float32)>

In [6]:
b = tf.Variable(tf.ones(2, dtype=tf.float32))
b

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>

In [7]:
x = tf.Variable([[10., 10., 30., 40.]], dtype=tf.float32)
x

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[10., 10., 30., 40.]], dtype=float32)>

In [8]:
with tf.GradientTape(persistent=True) as tape:
    y = tf.matmul(x, w) + b
    loss = tf.reduce_mean(y**2)

In [9]:
[dl_dw, dl_db] = tape.gradient(loss, [w, b])

In [10]:
dl_dw

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[ 326.6596 ,  355.71405],
       [ 326.6596 ,  355.71405],
       [ 979.9789 , 1067.1421 ],
       [1306.6384 , 1422.8562 ]], dtype=float32)>

In [11]:
dl_db

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([32.665962, 35.571404], dtype=float32)>

In [13]:
from tensorflow.keras.layers import Dense
layer = Dense(units=2, activation='relu')
x = tf.constant([[10., 20., 30.]])

In [14]:
with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_sum(y**2)

grad = tape.gradient(loss, layer.trainable_variables)

In [15]:
grad

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[   0.     ,  441.28546],
        [   0.     ,  882.5709 ],
        [   0.     , 1323.8564 ]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 0.      , 44.128548], dtype=float32)>]

In [16]:
x1 = tf.Variable(5.0)
x2 = tf.Variable(5.0, trainable=False)

In [17]:
x3 = tf.add(x1, x2)
x3

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

In [18]:
x4 = tf.constant(5.0)

In [19]:
with tf.GradientTape() as tape:
    y = (x1**2) + (x2**2) + (x3**2) + (x4**2)

grad = tape.gradient(y, [x1, x2, x3, x4]) # calculated for only x1 as others are not trainable
grad

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None, None, None]

In [20]:
x1 = tf.constant(5.0)
x2 = tf.Variable(3.0)

with tf.GradientTape() as tape:
    tape.watch(x1) # watch x1 for gradient calculation

    y = (x1**2) + (x2**2)

In [21]:
grad = tape.gradient(y, [x1, x2])
grad

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.0>]

In [22]:
with tf.GradientTape(watch_accessed_variables=False) as tape:
    # watch_accessed_variables=False means Gradient Tapes will not watch any variables that are not explicitly specified.
    tape.watch(x1) # watch x1 for gradient calculation

    y = (x1**2) + (x2**2)

grad = tape.gradient(y, [x1, x2])
grad

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None]