In [2]:
# Gradient Tape :  Automatic differentiation

# Note that only tensors with real or complex dtypes are differentiable.

In [3]:
import tensorflow as tf
tf.enable_eager_execution()

In [4]:
# Example 1:

# function : x**2 
# y = x**2   # xSquare # derive if function X**2 is 2x if x = 3 the derivative output is 6

x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x**2
    
dy_dx = tape.gradient(y, x)  # First order derivative

dy_dx.numpy()

6.0

In [5]:
# Automatically Watching Variables

# If x were a trainable variable instead of a constant, there would be no need to tell the tape to watch it

x = tf.Variable(6.0, trainable=True)
with tf.GradientTape() as tape:
    y = x**3

print(tape.gradient(y, x).numpy()) 

108.0


In [6]:
# If we were to re-run this replacing the first line with: x = tf.constant(3.0)

x = tf.constant(3.0)
with tf.GradientTape() as tape:
    y = x**3

print(tape.gradient(y, x).numpy()) 

AttributeError: 'NoneType' object has no attribute 'numpy'

In [None]:
# watch_accessed_variables=False 
#    - If we don’t want GradientTape to watch all trainable variables automatically
# Disabling watch_accessed_variables gives us fine control over what variables we want to watch

x = tf.Variable(3.0, trainable=True)

with tf.GradientTape(watch_accessed_variables=False) as tape:
    y = x**3

print(tape.gradient(y, x))

In [None]:
# Higher-Order Derivatives

x = tf.Variable(2.0) # first oder output will be 6

with tf.GradientTape() as tape1:
    tape1.watch(x)
    with tf.GradientTape() as tape2:
        tape2.watch(x)
        y = 3*x**2
    dy_dx = tape2.gradient(y,x)     # tape1 :First order derivative
d2y_d2x = tape1.gradient(dy_dx, x)   # tape : Second Order derivative

print(dy_dx)

print(d2y_d2x)

In [None]:
import numpy as np

w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')

x = tf.Variable([[1., 2., 3.]])

with tf.GradientTape(persistent=True) as tape:
    
    y = x @ w + b
    loss = tf.reduce_mean(y**2)
    
    
[dl_dw, dl_db]= tape.gradient(loss,[w,b])

In [8]:

# the GradientTape will not releases all the information stored inside of it for computational purposes.
# Default : persistent=False

# If we want to see all the varibales information,we can set persistent=True

a = tf.Variable(6.0, trainable=True)
b = tf.Variable(2.0, trainable=True)
with tf.GradientTape(persistent=True) as tape:
    y1 = a ** 2
    y2 = b ** 3
                                                                                                                                                                                                                                                                                                                                                
print(tape.gradient(y1, a).numpy())
print(tape.gradient(y2, b).numpy())

12.0
12.0


In [9]:
# stop_recording
# tape.stop_recording() temporarily pauses the tapes recording, leading to greater computation speed.

x = tf.Variable(3.0, trainable=True)
with tf.GradientTape() as tape:
    y = x**3
    with tape.stop_recording():
        print(tape.gradient(y, x).numpy()) # -> 27.0

27.0


In [10]:
# By default, GradientTape doesn’t track constants, so we must instruct it to with: tape.watch(variable)
# Below the code throwing error becuse tape.watch(x) commented

x = tf.constant(5.0)
with tf.GradientTape() as tape:
    #tape.watch(x)
    y = x**3
    
print(tape.gradient(y, x).numpy())

AttributeError: 'NoneType' object has no attribute 'numpy'

In [11]:
# Getting a gradient of None

# When a target is not connected to a source you will get a gradient of None.

x = tf.Variable(2.)
y = tf.Variable(3.)

with tf.GradientTape() as tape:
    z = y*y

print(tape.gradient(z,x))

None


In [12]:
# Zeros instead of None

x = tf.Variable([2., 2.])
y = tf.Variable(3.)


with tf.GradientTape() as tape:
  z = y**2
print(tape.gradient(z, x, unconnected_gradients=tf.UnconnectedGradients.ZERO))

tf.Tensor([0. 0.], shape=(2,), dtype=float32)


In [None]:
'''

Other Methods

.jacobian: “Computes the jacobian using operations recorded in context of this tape.”
   
.batch_jacobian: “Computes and stacks per-example jacobians.”

.reset: “Clears all information stored in this tape.”

.watched_variables: “Returns variables watched by this tape in order of
    
'''

In [16]:
# GradientTapes can be nested to compute higher-order derivatives. 

x = tf.Variable(2.0) # first oder output will be 6
with tf.GradientTape() as tape:
    
    y = 3*x**2
    
tape.gradient(y,x)

<tf.Tensor: id=220, shape=(), dtype=float32, numpy=12.0>

In [17]:
#  Replaced a variable with a tensor

x = tf.Variable(2.0)
for epoch in range(2):
    with tf.GradientTape() as tape:
        y = x+1
        
    print(type(x).__name__, ":", tape.gradient(y, x))

ResourceVariable : tf.Tensor(1.0, shape=(), dtype=float32)
ResourceVariable : tf.Tensor(1.0, shape=(), dtype=float32)


In [18]:
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')

my_vars = {
    'w': w,
    'b': b
}


x = tf.Variable([[1., 2., 3.]])

with tf.GradientTape(persistent=True) as tape:
    
    y = x @ w + b
    loss = tf.reduce_mean(y**2)
    
    
[dl_dw, dl_db]= tape.gradient(loss,[w,b])

grad = tape.gradient(loss, my_vars)

In [19]:
grad

{'w': <tf.Tensor: id=315, shape=(3, 2), dtype=float32, numpy=
 array([[-1.47831  , -2.9592826],
        [-2.95662  , -5.9185653],
        [-4.43493  , -8.877848 ]], dtype=float32)>,
 'b': <tf.Tensor: id=313, shape=(2,), dtype=float32, numpy=array([-1.47831  , -2.9592826], dtype=float32)>}

In [20]:
# Gradients with respect to a model

layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
    
    # Forward pass
    y = layer(x)
    
    loss = tf.reduce_mean(y**2)
    
# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables) 

for var, g in zip(layer.trainable_variables, grad):
    print(f'{var.name}, shape: {g.shape}')

dense/kernel:0, shape: (3, 2)
dense/bias:0, shape: (2,)


In [21]:
# Example :

x0 = tf.Variable(3.0, name='x0')
x1 = tf.Variable(3.0, name='x1', trainable=False)
x2 = tf.Variable(2.0, name='x2') + 1.0
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape() as tape:
    y = (x0**2) + (x1**2) + (x2**2)
    
grad = tape.gradient(y, [x0, x1, x2, x3])

for g in grad:
  print(g)


tf.Tensor(6.0, shape=(), dtype=float32)
None
None
None


In [22]:
x = tf.Variable(2.)

with tf.GradientTape() as tape:
  y = x * [3., 4.]

print(tape.gradient(y, x).numpy())

7.0


In [23]:
x = tf.linspace(-10.0, 10.0, 200+1)

with tf.GradientTape() as tape:
  tape.watch(x)
  y = tf.nn.sigmoid(x)

dy_dx = tape.gradient(y, x)

In [24]:
dy_dx

<tf.Tensor: id=431, shape=(201,), dtype=float32, numpy=
array([4.54166766e-05, 5.02143921e-05, 5.54590479e-05, 6.13294178e-05,
       6.77360949e-05, 7.47982340e-05, 8.26946052e-05, 9.14251650e-05,
       1.01049460e-04, 1.11627036e-04, 1.23396196e-04, 1.36356830e-04,
       1.50747219e-04, 1.66567232e-04, 1.84025266e-04, 2.03389267e-04,
       2.24718591e-04, 2.48400233e-04, 2.74523190e-04, 3.03384935e-04,
       3.35312623e-04, 3.70424765e-04, 4.09375789e-04, 4.52343491e-04,
       4.99922200e-04, 5.52467944e-04, 6.10514835e-04, 6.74626499e-04,
       7.45455211e-04, 8.23771697e-04, 9.10197268e-04, 1.00573874e-03,
       1.11131265e-03, 1.22783449e-03, 1.35663373e-03, 1.49891921e-03,
       1.65598630e-03, 1.82954292e-03, 2.02120445e-03, 2.23287800e-03,
       2.46652518e-03, 2.72454484e-03, 3.00929835e-03, 3.32372868e-03,
       3.67067871e-03, 4.05353727e-03, 4.47600102e-03, 4.94215591e-03,
       5.45618078e-03, 6.02310384e-03, 6.64806180e-03, 7.33687775e-03,
       8.09598900e-03

In [25]:
import matplotlib.pyplot as plt

plt.plot(x, y, label='y')
plt.plot(x, dy_dx, label='dy/dx')
plt.legend()
_ = plt.xlabel('x')

In [26]:
# Linear Regression

In [27]:
import numpy as np
import random
import pandas as pd

# Training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([i*10+5 for i in x_train]) # y = 10x+5

pd.DataFrame({"X":x_train,"Y":y_train})

optimizer = tf.keras.optimizers.SGD(0.01)

loss = []
a_a = []
b_b = []

# Trainable variables
a = tf.Variable(random.random(), trainable=True)
b = tf.Variable(random.random(), trainable=True)

for steps in range(10): 
    with tf.GradientTape() as t:
        
        # Function : (a * x_data) + b
        two_gate = tf.add(tf.multiply(a,x_train),b)
        
        # Define loss
        curr_loss = tf.square(tf.subtract(two_gate,50))
        
        a_a.append(a.numpy())
        b_b.append(b.numpy())
        
        grads = t.gradient(curr_loss, [a,b])
        optimizer.apply_gradients(zip(grads,[a,b]))
    
        loss.append(curr_loss.numpy())
        
import pandas as pd

pd.DataFrame(data = {'Loss': loss,'a':a_a,'b':b_b})

Unnamed: 0,Loss,a,b
0,"[2448.9119, 2414.0479, 2379.4338, 2345.07, 231...",0.35352,0.5135185
1,"[1520.0896, 171.03984, 4243.841, 13738.493, 28...",52.06655,11.01167
2,"[7688.501, 154954.62, 489442.2, 1011151.1, 172...",-305.9587,-37.6841
3,"[71910.3, 5829986.0, 20801920.0, 44987708.0, 7...",2146.375,318.1609
4,"[4630447.5, 283166080.0, 992453500.0, 21324928...",-14675.69,-2101.847
5,"[209230980.0, 13261590000.0, 46592573000.0, 10...",100694.2,14514.82
6,"[9896471000.0, 624168140000.0, 2192191900000.0...",-690562.1,-99431.01
7,"[465155560000.0, 29357091000000.0, 10311212000...",4736196.0,682073.1
8,"[21882160000000.0, 1380907800000000.0, 4850188...",-32482730.0,-4677787.0
9,"[1029273340000000.0, 6.4954745e+16, 2.2814198e...",222779900.0,32082340.0


In [28]:
# Polynomial Regression

# Training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([6*i**2 + 8*i + 2 for i in x_train]) # y = 6x^2 + 8x + 2

pd.DataFrame({'X':x_train,'Y':y_train})

Unnamed: 0,X,Y
0,0,2
1,1,16
2,2,42
3,3,80
4,4,130
5,5,192
6,6,266
7,7,352
8,8,450
9,9,560


In [29]:
optimizer = tf.keras.optimizers.SGD(0.01)

loss = []
a_a = []
b_b = []

# Trainable variables
a = tf.Variable(random.random(), trainable=True)
b = tf.Variable(random.random(), trainable=True)
c = tf.Variable(random.random(), trainable=True)

for steps in range(10): 
    with tf.GradientTape() as t:
        
        # Make prediction
        y_pred = a*x_train**2 + b*x_train + c
        
        # Calculate loss
        curr_loss = tf.square(tf.subtract(y_pred,50))
        
    
        #print(tf.math.reduce_mean(curr_loss).numpy())
        
        a_a.append(a.numpy())
        b_b.append(b.numpy())
        
        grads = t.gradient(curr_loss, [a,b])
        optimizer.apply_gradients(zip(grads,[a,b]))
    
        loss.append(curr_loss.numpy())
        
import pandas as pd

pd.DataFrame(data = {'Loss': loss,'a':a_a,'b':b_b})

Unnamed: 0,Loss,a,b
0,"[2475.923, 2371.8242, 2206.8738, 1987.9684, 17...",0.3333656,0.7239059
1,"[2475.923, 22720.342, 479937.56, 2484939.8, 78...",170.7756,29.71572
2,"[2475.923, 9661927000.0, 138442440000.0, 67469...",-87769.05,-10476.29
3,"[2475.923, 2539728500000000.0, 3.6413227e+16, ...",45015500.0,5380274.0
4,"[2475.923, 6.680948e+20, 9.578762e+21, 4.66866...",-23088040000.0,-2759485000.0
5,"[2475.923, 1.7574711e+26, 2.5197618e+27, 1.228...",11841650000000.0,1415315000000.0
6,"[2475.923, 4.6231524e+31, 6.628412e+32, 3.2306...",-6073475000000000.0,-725902400000000.0
7,"[2475.923, 1.2161533e+37, 1.7436514e+38, inf, ...",3.11503e+18,3.723087e+17
8,"[2475.923, inf, inf, inf, inf, inf, inf, inf, ...",-1.597671e+21,-1.909538e+20
9,"[2475.923, inf, inf, inf, inf, inf, inf, inf, ...",8.194308e+23,9.793846e+22
