# Computing Gradients Using Autodiff

In [3]:
import tensorflow as tf

def f(w1, w2):
 return 3 * w1 ** 2 + 2 * w1 * w2

w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps
(f(w1, w2 + eps) - f(w1, w2)) / eps

w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
 z = f(w1, w2)
gradients = tape.gradient(z, [w1, w2])

with tf.GradientTape() as tape:
 z = f(w1, w2)
 dz_dw1 = tape.gradient(z, w1) # => tensor 36.0

In [4]:
dz_dw1

<tf.Tensor: shape=(), dtype=float32, numpy=36.0>

In [5]:
with tf.GradientTape(persistent=True) as tape:
 z = f(w1, w2)
dz_dw1 = tape.gradient(z, w1) # => tensor 36.0
dz_dw2 = tape.gradient(z, w2) # => tensor 10.0, works fine now!
del tape

In [6]:
dz_dw1

<tf.Tensor: shape=(), dtype=float32, numpy=36.0>

In [7]:
dz_dw2

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

In [8]:
c1, c2 = tf.constant(5.), tf.constant(3.)
with tf.GradientTape() as tape:
 z = f(c1, c2)
gradients = tape.gradient(z, [c1, c2])

In [9]:
gradients

[None, None]

In [10]:
with tf.GradientTape() as tape:
 tape.watch(c1)
 tape.watch(c2)
 z = f(c1, c2)
gradients = tape.gradient(z, [c1, c2])

In [11]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [12]:
with tf.GradientTape(persistent=True) as hessian_tape:
 with tf.GradientTape() as jacobian_tape:
  z = f(w1, w2)
 jacobians = jacobian_tape.gradient(z, [w1, w2])
hessians = [hessian_tape.gradient(jacobian, [w1, w2])
 for jacobian in jacobians]
del hessian_tape

In [13]:
hessians

[[<tf.Tensor: shape=(), dtype=float32, numpy=6.0>,
  <tf.Tensor: shape=(), dtype=float32, numpy=2.0>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=2.0>, None]]

In [15]:
def f(w1, w2):
 return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)
with tf.GradientTape() as tape:
 z = f(w1, w2) # same result as without stop_gradient()
gradients = tape.gradient(z, [w1, w2]) # => returns [tensor 30., None]

In [16]:
gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [17]:
@tf.custom_gradient
def my_better_softplus(z):
 exp = tf.exp(z)
 def my_softplus_gradients(grad):
  return grad / (1 + 1 / exp)
 return tf.math.log(exp + 1), my_softplus_gradients

x = tf.Variable([100.])
with tf.GradientTape() as tape:
  z = my_better_softplus(x)

tape.gradient(z, [x])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>]

# Custom Training Loops


In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the neural network architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.hidden = nn.Linear(28*28, 128)
        self.relu = nn.ReLU()
        self.output = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x

# Set up data loaders
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize the model, loss function, and optimizer
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Custom training loop
def train(model, dataloader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}')

# Train the model
train(model, train_loader, criterion, optimizer, epochs=5)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 21516991.14it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 631210.95it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 5668740.21it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3384955.36it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch 1, Loss: 0.3874411182696504
Epoch 2, Loss: 0.19976051347548646
Epoch 3, Loss: 0.14357210623263233
Epoch 4, Loss: 0.11548272613733848
Epoch 5, Loss: 0.09890914866144755


In [19]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
import numpy as np

# Load and preprocess the MNIST dataset
mnist = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

# Define the model architecture with L2 regularization
l2_reg = regularizers.l2(0.05)
model = keras.models.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(30, activation="elu", kernel_initializer="he_normal", kernel_regularizer=l2_reg),
    layers.Dense(10, kernel_regularizer=l2_reg)
])

# Custom function to generate random batches
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

# Custom function to print training progress
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

# Training parameters
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(lr=0.01)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

# Training loop
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = loss_fn(y_batch, y_pred)
            loss = tf.add_n([main_loss] + model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)

        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)

    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz




Epoch 1/5
60000/60000 - mean: 1.6276 - sparse_categorical_accuracy: 0.8329
60000/60000 - mean: 1.6276 - sparse_categorical_accuracy: 0.8329
Epoch 2/5
60000/60000 - mean: 1.4766 - sparse_categorical_accuracy: 0.8418
60000/60000 - mean: 1.4766 - sparse_categorical_accuracy: 0.8418
Epoch 3/5
60000/60000 - mean: 1.4739 - sparse_categorical_accuracy: 0.8399
60000/60000 - mean: 1.4739 - sparse_categorical_accuracy: 0.8399
Epoch 4/5
60000/60000 - mean: 1.4678 - sparse_categorical_accuracy: 0.8386
60000/60000 - mean: 1.4678 - sparse_categorical_accuracy: 0.8386
Epoch 5/5
60000/60000 - mean: 1.4706 - sparse_categorical_accuracy: 0.8383
60000/60000 - mean: 1.4706 - sparse_categorical_accuracy: 0.8383


In [21]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
import numpy as np

# Load and preprocess the MNIST dataset
mnist = keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

# Define the model architecture with L2 regularization
l2_reg = regularizers.l2(0.05)
model = keras.models.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(30, activation="elu", kernel_initializer="he_normal", kernel_regularizer=l2_reg),
    layers.Dense(10, kernel_regularizer=l2_reg)
])

# Custom function to generate random batches
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

# Custom function to print training progress
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics, end=end)

# Training parameters
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(lr=0.01)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

# Training loop
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = loss_fn(y_batch, y_pred)
            loss = tf.add_n([main_loss] + model.losses)

        gradients = tape.gradient(loss, model.trainable_variables)

        # Apply weight constraints if defined
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))

        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)

        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)

    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()



Epoch 1/5
60000/60000 - mean: 1.6318 - sparse_categorical_accuracy: 0.8331
60000/60000 - mean: 1.6318 - sparse_categorical_accuracy: 0.8331
Epoch 2/5
60000/60000 - mean: 1.4822 - sparse_categorical_accuracy: 0.8392
60000/60000 - mean: 1.4822 - sparse_categorical_accuracy: 0.8392
Epoch 3/5
60000/60000 - mean: 1.4721 - sparse_categorical_accuracy: 0.8381
60000/60000 - mean: 1.4721 - sparse_categorical_accuracy: 0.8381
Epoch 4/5
60000/60000 - mean: 1.4745 - sparse_categorical_accuracy: 0.8393
60000/60000 - mean: 1.4745 - sparse_categorical_accuracy: 0.8393
Epoch 5/5
60000/60000 - mean: 1.4733 - sparse_categorical_accuracy: 0.8359
60000/60000 - mean: 1.4733 - sparse_categorical_accuracy: 0.8359


# TensorFlow Functions and Graphs


In [22]:
def cube(x):
   return x ** 3

In [23]:
cube(2)

8

In [24]:
cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [25]:
tf_cube = tf.function(cube)
tf_cube

<tensorflow.python.eager.polymorphic_function.polymorphic_function.Function at 0x7ca65af08760>

In [26]:
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [27]:
tf_cube(tf.constant(2.0))

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [28]:
@tf.function
def tf_cube(x):
  return x ** 3

In [29]:
tf_cube.python_function(2)

8

# Autograph and Tracing and TF Function Rules


In [30]:
tf.autograph.to_code(tf_cube.python_function)

"def tf__tf_cube(x):\n    with ag__.FunctionScope('tf_cube', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:\n        do_return = False\n        retval_ = ag__.UndefinedReturnValue()\n        try:\n            do_return = True\n            retval_ = ag__.ld(x) ** 3\n        except:\n            do_return = False\n            raise\n        return fscope.ret(retval_, do_return)\n"

In [33]:
import tensorflow as tf

def compute_jacobian(func, x):
    """
    Compute the Jacobian matrix of a function `func` at point `x` using TensorFlow.

    Args:
        func: A callable function that takes a TensorFlow tensor `x` as input.
        x: A TensorFlow tensor representing the point at which to compute the Jacobian.

    Returns:
        jacobian: A TensorFlow tensor representing the Jacobian matrix.
    """
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = func(x)
    jacobian = tape.jacobian(y, x)
    return jacobian

def compute_hessian(func, x):
    """
    Compute the Hessian matrix of a function `func` at point `x` using TensorFlow.

    Args:
        func: A callable function that takes a TensorFlow tensor `x` as input.
        x: A TensorFlow tensor representing the point at which to compute the Hessian.

    Returns:
        hessian: A TensorFlow tensor representing the Hessian matrix.
    """
    with tf.GradientTape() as tape2:
        tape2.watch(x)
        with tf.GradientTape() as tape1:
            tape1.watch(x)
            y = func(x)
        gradients = tape1.gradient(y, x)
    hessian = tape2.jacobian(gradients, x)
    return hessian

# Example usage:
# Define a function for which to compute Jacobian and Hessian
def quadratic_function(x):
    return tf.reduce_sum(x**2)

# Define the point at which to compute Jacobian and Hessian
x_value = tf.constant([1.0, 2.0])

# Compute Jacobian
jacobian_result = compute_jacobian(quadratic_function, x_value)
print("Jacobian:")
print(jacobian_result)

# Compute Hessian
hessian_result = compute_hessian(quadratic_function, x_value)
print("\nHessian:")
print(hessian_result)

# Convert functions to autograph-compatible code
jacobian_code = tf.autograph.to_code(compute_jacobian)
hessian_code = tf.autograph.to_code(compute_hessian)

print("\nAutograph-compatible code for compute_jacobian:")
print(jacobian_code)

print("\nAutograph-compatible code for compute_hessian:")
print(hessian_code)


Jacobian:
tf.Tensor([2. 4.], shape=(2,), dtype=float32)

Hessian:
tf.Tensor(
[[2. 0.]
 [0. 2.]], shape=(2, 2), dtype=float32)

Autograph-compatible code for compute_jacobian:
    def tf__compute_jacobian(func, x):
        """
Compute the Jacobian matrix of a function `func` at point `x` using TensorFlow.

Args:
    func: A callable function that takes a TensorFlow tensor `x` as input.
    x: A TensorFlow tensor representing the point at which to compute the Jacobian.

Returns:
    jacobian: A TensorFlow tensor representing the Jacobian matrix.
"""
        with ag__.FunctionScope('compute_jacobian', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
            do_return = False
            retval_ = ag__.UndefinedReturnValue()
            with ag__.ld(tf).GradientTape() as tape:
                ag__.converted_call(ag__.ld(tape).watch, (ag__.ld(x),), None, fscope)
                y = ag__.converted_cal

#Loading and Preprocessing Data with TensorFlow

# The Data API

In [34]:
X = tf.range(10)

In [35]:
dataset = tf.data.Dataset.from_tensor_slices(X)

In [36]:
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [37]:
for item in dataset:
  print(item)

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)


# Chaining Transformations

In [38]:
dataset = dataset.repeat(3).batch(7)
for item in dataset:
  print(item)

tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int32)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)
tf.Tensor([8 9], shape=(2,), dtype=int32)
