A bunch of code, almost all from ChatGPT, attempting to find functional approaches of obtaining a batch hessian

In [38]:
import tensorflow as tf

# Create a simple neural network model with 1 input and 2 outputs
model = tf.keras.Sequential([
    tf.keras.layers.Dense(2, input_shape=(1,), activation='linear')  # No activation to keep it simple
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Generate some dummy data
input_data = tf.constant([[1.0], [2.0], [3.0]])  # Shape (3, 1) - A batch of 3 samples

# Use GradientTape to monitor the input for gradient computation
with tf.GradientTape(persistent=True) as tape:
    tape.watch(input_data)
    
    # Forward pass
    output_data = model(input_data)

# Compute the batch Jacobian
batch_jacobian = tape.batch_jacobian(output_data, input_data)

# Clean up resources of GradientTape
del tape

# Print the batch Jacobian
print("Batch Jacobian:", batch_jacobian.numpy())


Batch Jacobian: [[[1.219303]
  [0.34945 ]]

 [[1.219303]
  [0.34945 ]]

 [[1.219303]
  [0.34945 ]]]


In [39]:
import tensorflow as tf

# Create a simple neural network model with 1 input and 2 outputs
model = tf.keras.Sequential([
    tf.keras.layers.Dense(2, input_shape=(1,), activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Generate some dummy data
input_data = tf.constant([[1.0], [2.0], [3.0]])  # Shape (3, 1) - A batch of 3 samples

def compute_batch_jacobian(tape, input_data, output_data):
    # Compute the batch Jacobian using the provided GradientTape
    batch_jacobian = tape.batch_jacobian(output_data, input_data)
    
    return batch_jacobian

# Forward pass to get the output data
with tf.GradientTape(persistent=True) as tape:
    tape.watch(input_data)
    output_data = model(input_data)

# Call the function to compute the batch Jacobian
batch_jacobian = compute_batch_jacobian(tape, input_data, output_data)

# Clean up resources of GradientTape
del tape

# Print the batch Jacobian
print("Batch Jacobian:", batch_jacobian.numpy())


Batch Jacobian: [[[-0.9224357]
  [-1.0878837]]

 [[-0.9224357]
  [-1.0878837]]

 [[-0.9224357]
  [-1.0878837]]]


In [40]:
import tensorflow as tf

# Create a simple neural network model with 1 input and 2 outputs
model = tf.keras.Sequential([
    tf.keras.layers.Dense(2, input_shape=(1,), activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Generate some dummy data
input_data = tf.constant([[1.0], [2.0], [3.0]], dtype=tf.float32)  # Shape (3, 1) - A batch of 3 samples

# First and Second Derivative calculation
with tf.GradientTape(persistent=True) as outer_tape:
    outer_tape.watch(input_data)
    
    with tf.GradientTape(persistent=True) as inner_tape:
        inner_tape.watch(input_data)
        
        # Forward pass
        output_data = model(input_data)

    # First derivative (Jacobian)
    jacobian = outer_tape.batch_jacobian(output_data, input_data)
    
    # Compute gradients (first derivatives) with respect to input
    gradients = inner_tape.gradient(output_data, input_data)  # shape will be (3, 1)

# Check if gradients are None
if gradients is not None:
    gradients = tf.reshape(gradients, [-1, 1, 1])  # reshape to (3, 1, 1) to match batch jacobian shape
    
    # Second derivative
    with tf.GradientTape(persistent=True) as second_tape:
        second_tape.watch(gradients)
        
        # "Fake" forward pass to enable second derivative computation
        dummy_output = gradients * gradients  # just square it, shape will be (3, 1, 1)
        
    # Compute second derivatives
    second_derivatives = second_tape.batch_jacobian(dummy_output, gradients)  # should be of shape (3, 1, 1, 1)
    
    del second_tape
else:
    print("First derivatives could not be computed.")
    second_derivatives = None

# Clean up resources of GradientTape
del outer_tape
del inner_tape

# Print the batch Jacobian and second derivatives
print("Batch Jacobian:", jacobian.numpy())
if second_derivatives is not None:
    print("Second Derivatives:", second_derivatives.numpy())
else:
    print("Second Derivatives could not be computed.")


Batch Jacobian: [[[-0.9105931 ]
  [ 0.13474047]]

 [[-0.9105931 ]
  [ 0.13474047]]

 [[-0.9105931 ]
  [ 0.13474047]]]
Second Derivatives: [[[[[-1.5517052]]]]



 [[[[-1.5517052]]]]



 [[[[-1.5517052]]]]]


In [41]:
import tensorflow as tf

@tf.function
def compute_gradients(input_data, model):
    with tf.GradientTape(persistent=True) as tape1:
        tape1.watch(input_data)
        with tf.GradientTape(persistent=True) as tape2:
            tape2.watch(input_data)
            output_data = model(input_data)
        
        # First-order gradients (Jacobian)
        grads1 = tape2.gradient(output_data, input_data)
    
    # Second-order gradients
    grads2 = tape1.gradient(grads1, input_data)
    
    del tape1
    del tape2
    
    return output_data, grads1, grads2

# Create a simple neural network model with 1 input and 2 outputs
model = tf.keras.Sequential([
    tf.keras.layers.Dense(2, input_shape=(1,), activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Generate some dummy data
input_data = tf.constant([[1.0], [2.0], [3.0]], dtype=tf.float32)  # Shape (3, 1) - A batch of 3 samples

# Compute gradients
output_data, grads1, grads2 = compute_gradients(input_data, model)

print("Output Data:", output_data.numpy())
print("First-order gradients (Jacobian):", grads1.numpy())
print("Second-order gradients:", grads2.numpy())


Output Data: [[-1.0177648  0.7162975]
 [-2.0355296  1.432595 ]
 [-3.0532944  2.1488924]]
First-order gradients (Jacobian): [[-0.3014673]
 [-0.3014673]
 [-0.3014673]]


AttributeError: 'NoneType' object has no attribute 'numpy'

In [44]:
import tensorflow as tf

# Define a simple function with two outputs and one input
def my_function(x):
    return x ** 2, x ** 3

# Create an input tensor
x = tf.constant([[1.0, 2.0, 3.0]], dtype=tf.float32)

# Initialize outer GradientTape
with tf.GradientTape(persistent=True) as outer_tape:
    outer_tape.watch(x)
    
    # Initialize inner GradientTape
    with tf.GradientTape(persistent=True) as inner_tape:
        inner_tape.watch(x)
        
        # Compute function values
        y1, y2 = my_function(x)
    
    # Compute first derivatives (Jacobians)
    jacobian_y1 = inner_tape.batch_jacobian(y1, x)
    jacobian_y2 = inner_tape.batch_jacobian(y2, x)
    
# Compute second derivatives (Hessians)
hessian_y1 = outer_tape.batch_jacobian(jacobian_y1, x)
hessian_y2 = outer_tape.batch_jacobian(jacobian_y2, x)

# Delete tapes to free resources
del inner_tape
del outer_tape

print("Jacobian for y1:", jacobian_y1)
print("Jacobian for y2:", jacobian_y2)
print("Hessian for y1:", hessian_y1)
print("Hessian for y2:", hessian_y2)


2023-10-06 10:47:30.450842: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/gradient_tape/mul_1/pfor/Mul_grad/Reshape/pfor/concat/loop_body/PartitionedCall/pfor/Reshape' with dtype int32 and shape [1]
	 [[{{node gradients/gradient_tape/mul_1/pfor/Mul_grad/Reshape/pfor/concat/loop_body/PartitionedCall/pfor/Reshape}}]]
2023-10-06 10:47:30.460909: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/gradient_tape/mul_1/pfor/Mul_grad/Reshape/pfor/concat/loop_body/PartitionedCall/pfor/Reshape' with dtype int32 and shape [1]
	 [[{{node gradients/gradient_tape/mul_1/pfor/Mul_grad/Reshape/pfor/concat/loop_body/P

Jacobian for y1: tf.Tensor(
[[[2. 0. 0.]
  [0. 4. 0.]
  [0. 0. 6.]]], shape=(1, 3, 3), dtype=float32)
Jacobian for y2: tf.Tensor(
[[[ 3.  0.  0.]
  [ 0. 12.  0.]
  [ 0.  0. 27.]]], shape=(1, 3, 3), dtype=float32)
Hessian for y1: tf.Tensor(
[[[[2. 0. 0.]
   [0. 0. 0.]
   [0. 0. 0.]]

  [[0. 0. 0.]
   [0. 2. 0.]
   [0. 0. 0.]]

  [[0. 0. 0.]
   [0. 0. 0.]
   [0. 0. 2.]]]], shape=(1, 3, 3, 3), dtype=float32)
Hessian for y2: tf.Tensor(
[[[[ 6.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0. 12.  0.]
   [ 0.  0.  0.]]

  [[ 0.  0.  0.]
   [ 0.  0.  0.]
   [ 0.  0. 18.]]]], shape=(1, 3, 3, 3), dtype=float32)


In [45]:
import tensorflow as tf

# Define a simple function with q=2 outputs and one input
# f(x) = [x^2, x^3]
def my_function(x):
    return tf.stack([x ** 2, x ** 3], axis=-1)

# Create an input tensor (batch size N=3)
x = tf.constant([[1.0, 2.0, 3.0]], dtype=tf.float32)

# Initialize the GradientTape
with tf.GradientTape(persistent=True) as tape:
    tape.watch(x)
    y = my_function(x)  # Shape: [N, q]

# Compute the Jacobian (first derivatives)
jacobian = tape.batch_jacobian(y, x)  # Shape: [N, q]

# Initialize a tensor to store the second derivatives
second_derivatives = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

# Loop to compute the second derivatives for each output
for i in range(y.shape[-1]):
    with tf.GradientTape() as inner_tape:
        inner_tape.watch(x)
        y_i = y[:, i]
    gradient_i = inner_tape.gradient(y_i, x)
    hessian_i = tape.gradient(gradient_i, x)
    second_derivatives = second_derivatives.write(i, hessian_i)

# Stack the second derivatives to form a [N, q] matrix
second_derivatives_matrix = tf.transpose(second_derivatives.stack())

# Delete the tape to free resources
del tape

jacobian, second_derivatives_matrix


TypeError: Argument `target` should be a list or nested structure of Tensors, Variables or CompositeTensors to be differentiated, but received None.