In [1]:
import tensorflow as tf

In [2]:
x = tf.Variable(initial_value= [1, 2, 3], trainable= True, dtype = tf.float32)
y = tf.convert_to_tensor([4,5,6], dtype = tf.float32)

In [5]:
z = tf.reduce_sum((x*x + x*y)**2)

In [6]:
z

<tf.Tensor: shape=(), dtype=float32, numpy=950.0>

How to get the derivatives of z wrt x and y?
The GradientTape creates the computational graph in order to go backward then compute the gradient

In [12]:
#i can use the tape only one time without persistent = true
with tf.GradientTape(persistent= True) as tape:
  #everything here is recorder by the gradient tape, so we can then get the gradient
  z = tf.reduce_sum((x*x + x*y)**2)

In [9]:
#partial derivative wrt to x (x1,x2, x3)
tape.gradient(z,x)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 60., 252., 648.], dtype=float32)>

In [13]:
#doesnt work because y is not a variable, is a tensor
tape.gradient(z,[x,y])

[<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 60., 252., 648.], dtype=float32)>,
 None]

In [14]:
#in order to consider also the tensors (non trainable variables) I shopuld use watch()
with tf.GradientTape(persistent= True) as tape:
  tape.watch(y)
  z = tf.reduce_sum((x*x + x*y)**2)

In [15]:
tape.gradient(z,[x,y])

[<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 60., 252., 648.], dtype=float32)>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 10.,  56., 162.], dtype=float32)>]

##Second derivative of z wrt x

In [17]:
#in order to consider also the tensors (non trainable variables) I shopuld use watch()
with tf.GradientTape() as tape2:
  with tf.GradientTape() as tape:
    z = tf.reduce_sum((x*x + x*y)**2)
  dx = tape.gradient(z,x)
dx2 = tape2.gradient(dx, x)

In [18]:
dx2

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 92., 218., 396.], dtype=float32)>

####NB: in gradient tape, we should use always tf operations, and not np, otherwise tape is not able to record np objects, and doesnt create the computational graph