## Automatic Differenciation is very interesting concept which is used in calculation of partial derivatives etc for a variable. 
### Interesting, if possible learn more about this ..!

#### https://www.tensorflow.org/guide/autodiff tensorflow implementation details...!

In [1]:
import tensorflow as tf
import numpy as np

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [3]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x**2

# dy = 2x * dx 
dy_dx = tape.gradient(y, x)
dy_dx.numpy()

6.0

In [4]:
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(persistent=True) as tape:
    y = x @ w + b   # matrix multiplication
    loss = tf.reduce_mean(y**2)   # Mean Squared error

[dl_dw, dl_db] = tape.gradient(loss, [w, b])
dl_db

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([-1.1915586,  3.9254484], dtype=float32)>

### In above instead of passing a variable list we can pass variable dictionary to the gradient function.

eg: 
my_vars = {
    'w': w,
    'b': b
}
grad = tape.gradient(loss, my_vars)
grad['b']

In [5]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
    # Forward pass
    y = layer(x)
    loss = tf.reduce_mean(y**2)

# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)

for var, g in zip(layer.trainable_variables, grad):
    print(f'{var.name}, shape: {g.shape}')

dense/kernel:0, shape: (3, 2)
dense/bias:0, shape: (2,)


In [6]:
# A trainable variable
x0 = tf.Variable(3.0, name='x0')
# Not trainable
x1 = tf.Variable(3.0, name='x1', trainable=False)
# Not a Variable: A variable + tensor returns a tensor.
x2 = tf.Variable(2.0, name='x2') + 1.0
# Not a variable
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape() as tape:
  y = (x0**2) + (x1**2) + (x2**2)

grad = tape.gradient(y, [x0, x1, x2, x3])

for g in grad:
    print(g)

tf.Tensor(6.0, shape=(), dtype=float32)
None
None
None


In [8]:
x0 = tf.Variable(0.0)
x1 = tf.Variable(10.0)

#Use below to override the default behaviour of watching the all variables and manually assign which variables to watch
with tf.GradientTape(watch_accessed_variables=False) as tape:
    tape.watch(x1)
    y0 = tf.math.sin(x0)
    y1 = tf.nn.softplus(x1)
    y = y0 + y1
    ys = tf.reduce_sum(y)
    
# dys/dx1 = exp(x1) / (1 + exp(x1)) = sigmoid(x1)
grad = tape.gradient(ys, {'x0': x0, 'x1': x1})

print('dy/dx0:', grad['x0'])
print('dy/dx1:', grad['x1'].numpy())    

dy/dx0: None
dy/dx1: 0.9999546


### *** To compute multiple gradients over the same computation, create a gradient tape with persistent=True

In [18]:
x0 = tf.Variable(3.0)
x1 = tf.Variable(0.0)

with tf.GradientTape() as tape:
    # Update x1 = x1 + x0.
    x3 = x1.assign_add(x0)
    # The tape starts recording from x1.
    y = x3**2   # y = (x1 + x0)**2

    print(type(x3))

# This doesn't work.
print(tape.gradient(y, x0))   #dy/dx0 = 2*(x1 + x0)

<class 'tensorflow.python.ops.resource_variable_ops._UnreadVariable'>
None
