# TensorFlow 2.0 alpha - Gradient Tape and Automatic Differentiation
### A technique for optimizing ML models

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

  from ._conv import register_converters as _register_converters


## Gradient Tape
#### Use tf.GradientTape for Automatic Differentiation - finding the Gradient of a computation with respect to its Input Variables
#### TF records all operations in this context onto a Tape - then traces it, using the Gradients of each operation, to compute the Gradients of each computation (using reverse mode differentiation)

In [3]:
x = tf.ones((2, 2,))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
# Derivative of z, with respect to x (original input tensor) 

dz_dx = t.gradient(z, x)
for i in [0, 1]:
    for j in [0,1]:
        assert dz_dx[i][j].numpy() == 8.0

#### Can also request Gradients of the ouput with respect to intermediate values computed during a Taped context

In [4]:
x = tf.ones((2,2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y,y)
    
# Derivative of z, with respect to y (intermediate value) using Tape

dz_dy = t.gradient(z,y)
assert dz_dy.numpy() == 8.0

#### Create a Persistent GradientTape - for multiple calls to the gradient method - otherwise, Tape resources released when method is called

In [5]:
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
    t.watch(x)
    y = x * x
    z = y * y
dz_dx = t.gradient(z, x)
dy_dx = t.gradient(y, x)

del t           # drop Tape reference

# Recording Control Flow
#### Tapes record operations as executed - Python Control Flow (If's, while's, etc.)

In [6]:
def f(x, y):
    output = 1.0
    for i in range(y):
        if i > 1 and i < 5:
            output = tf.multiply(output, x)
    return output

def grad(x, y):
    with tf.GradientTape() as t:
        t.watch(x)
        out = f(x, y)
    return t.gradient(out, x)

x = tf.convert_to_tensor(2.0)

assert grad(x, 6).numpy() == 12.0
assert grad(x, 5).numpy() == 12.0
assert grad(x, 4).numpy() == 4.0

# Higher-order Gradients
#### Context Manager Tape operations are recorded for Automatic Differentiation
#### Gradients computed in this context are recorded as well - With this, the API works for higher-order gradients too

In [7]:
# create TF variable initialized to 1.0

x = tf.Variable(1.0)

with tf.GradientTape() as t:
    with tf.GradientTape() as t2:
        y = x * x * x
        
# compute gradient inside t Context Manager - making gradient computation differentiable too
        
    dy_dx = t2.gradient(y, x)
d2y_dx2 = t.gradient(dy_dx, x)

assert dy_dx.numpy() == 3.0
assert d2y_dx2.numpy() == 6.0