# gradientTape

In [1]:
import tensorflow as tf
import numpy as np

## Autodifferentiation (univariate and multivariate)

In [2]:
x1 = tf.Variable(2.0, trainable=True)
x2 = tf.Variable(2.0, trainable=True)
with tf.GradientTape(persistent=True) as tape:
    y = x1**5 + x2**2 + 5
dy_dx = tape.gradient(y, [x1, x2])
dy_dx1 = tape.gradient(y, x1)
dy_dx2 = tape.gradient(y, x2)
print(y.numpy())
print(dy_dx1.numpy())
print(dy_dx2.numpy())
print(dy_dx)

41.0
80.0
4.0
[<tf.Tensor: shape=(), dtype=float32, numpy=80.0>, <tf.Tensor: shape=(), dtype=float32, numpy=4.0>]


In [3]:
def derivative(x):
    x = tf.Variable(x, trainable=True)
    with tf.GradientTape(persistent=True) as tape:
        y = x**5
    dy_dx = tape.gradient(y, x)
    return dy_dx

In [4]:
print(derivative(3.0).numpy())

405.0


## Higher Order Derivatives

In [5]:
x = tf.Variable(2.0, trainable=True)
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:
        y = x**5
    dy_dx = tape1.gradient(y, x)
d2y_dx2 = tape2.gradient(dy_dx, x)
print(dy_dx.numpy())
print(d2y_dx2.numpy())

80.0
160.0


## Training a Regression Model

In [6]:
# generate the training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([6*i**2 + 8*i + 2 for i in x_train]) #y = 6x^2 + 8x + 2

In [7]:
# trainable variables
a = tf.Variable(np.random.random(), trainable=True)
b = tf.Variable(np.random.random(), trainable=True)
c = tf.Variable(np.random.random(), trainable=True)

In [8]:
# loss function
def loss_function(real_y, pred_y):
    return tf.abs(real_y - pred_y)

In [9]:
eta = 0.001
#real_x = x_train
#real_y = y_train

def epoch(real_x, real_y):
    with tf.GradientTape(persistent=True) as tape:
        # Make prediction
        pred_y = a*real_x**2 + b*real_x + c
        # Calculate loss
        poly_loss = loss_function(real_y, pred_y)
    
    a_grad, b_grad, c_grad = tape.gradient(poly_loss, (a, b, c))

    a.assign(a - eta*a_grad)
    b.assign_sub(eta*b_grad)
    c.assign_add(-eta*c_grad)

In [10]:
for _ in range(10000):
    epoch(x_train, y_train)

In [11]:
a, b, c

(<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=6.395968>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=7.596123>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0090473>)

## Training a Neural Network using Gradient Tape

### MNIST dataset

In [12]:
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
# import random
import math
%matplotlib inline

In [13]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train / 255).reshape((-1, 28, 28, 1))
y_train = tf.keras.utils.to_categorical(y_train, 10)
x_test = (x_test / 255).reshape((-1, 28, 28, 1))
y_test = tf.keras.utils.to_categorical(y_test, 10)

In [14]:
# define model
# hyperparameters
batch_size = 128
epochs = 25
optimizer = Adam(lr=0.001)
weight_init = RandomNormal()

# build model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer=weight_init, input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer=weight_init))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer=weight_init))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', kernel_initializer=weight_init))

  super(Adam, self).__init__(name, **kwargs)


In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 12, 12, 64)        0         
                                                                 
 flatten (Flatten)           (None, 9216)              0         
                                                                 
 dense (Dense)               (None, 128)               1179776   
                                                        

In [16]:
# training model
# step function (relate to epochs in above)
def step(real_x, real_y):
    with tf.GradientTape() as tape:
        pred_y = model(real_x.reshape((-1, 28, 28, 1)))
        model_loss = tf.keras.losses.categorical_crossentropy(real_y, pred_y)
    
    model_gradients = tape.gradient(model_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))

In [17]:
# training loop
bat_per_epoch = math.floor(len(x_train) / batch_size)
for epoch in range(epochs):
    print('=', end='')
    for i in range(bat_per_epoch):
        n = i*batch_size
        step(x_train[n:n+batch_size], y_train[n:n+batch_size])

model.compile(optimizer=optimizer, loss=tf.losses.categorical_crossentropy, metrics=['acc']) # Compile just for evaluation
print('\nAccuracy:', model.evaluate(x_test, y_test, verbose=0)[1])

Accuracy: 0.9889000058174133


In [18]:
y = model.predict(x_test)
print(y.shape)

(10000, 10)
