In [1]:
import tensorflow as tf

## Gradient of a Variable

In [2]:
x = tf.Variable(3.0)
with tf.GradientTape() as g:
  y = x * x
dy_dx = g.gradient(y, x)
print(dy_dx)

tf.Tensor(6.0, shape=(), dtype=float32)


## Gradient of a Constant

In [3]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
  y = x * x
dy_dx = g.gradient(y, x)
print(dy_dx)


None


we have to use watch!

In [4]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x) # because x is not variable
  y = x * x
dy_dx = g.gradient(y, x)
print(dy_dx)


tf.Tensor(6.0, shape=(), dtype=float32)


## A non-persistent VS Persistent Tape

In [5]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
  g.watch(x)
  y = x * x
  z = y * y
dz_dx = g.gradient(z, x)
print(dz_dx)


dy_dx = g.gradient(y, x)
print(dy_dx)


tf.Tensor(108.0, shape=(), dtype=float32)


RuntimeError: ignored

In [6]:
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as g:
  g.watch(x)
  y = x * x
  z = y * y
dz_dx = g.gradient(z, x)  # (4*x^3 at x = 3)
print(dz_dx)


dy_dx = g.gradient(y, x)
print(dy_dx)

tf.Tensor(108.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


## GradientTape on a Model

In [7]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train.reshape(-1,784)
x_test = x_test.reshape(-1,784)

In [8]:
model = tf.keras.Sequential([
  tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(784,)),
  tf.keras.layers.Dense(10, activation=tf.nn.relu),
  tf.keras.layers.Dense(10)
])


loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [9]:
def loss(model, x, y, training):
  # training=training is needed only if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  y_ = model(x, training=training)

  return loss_object(y_true=y, y_pred=y_)

l = loss(model, x_train, y_train, training=False)
print("Loss test: {}".format(l))


Loss test: 2.3115293979644775


###Use the tf.GradientTape context to calculate the gradients used to optimize your model:

In [10]:
def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets, training=True)
  return loss_value, tape.gradient(loss_value, model.trainable_variables)

In [11]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

In [12]:
loss_value, grads = grad(model, x_train[:256,:], y_train[:256])

In [13]:
print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
                                          loss_value.numpy()))

Step: 0, Initial Loss: 2.3078858852386475


In [14]:
optimizer.apply_gradients(zip(grads, model.trainable_variables))

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [15]:
print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(),
                                          loss(model, x_train[:256,:], y_train[:256], training=False).numpy()))


Step: 1,         Loss: 2.305161952972412


### use loop to train a model

In [16]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))

# Define the batch size
batch_size = 256

# Batch the datasets
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)


In [17]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

In [18]:
epochs = 5
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:
            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train, training=True)  # Logits for this minibatch
            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)
        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)
        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )



Start of epoch 0
Training loss (for one batch) at step 0: 2.3052
Training loss (for one batch) at step 200: 0.6672

Start of epoch 1
Training loss (for one batch) at step 0: 0.5424
Training loss (for one batch) at step 200: 0.4090

Start of epoch 2
Training loss (for one batch) at step 0: 0.3562
Training loss (for one batch) at step 200: 0.3385

Start of epoch 3
Training loss (for one batch) at step 0: 0.2972
Training loss (for one batch) at step 200: 0.2924

Start of epoch 4
Training loss (for one batch) at step 0: 0.2667
Training loss (for one batch) at step 200: 0.2595
