In tensorflow, the 2nd derivative of a matrix determinant does not match the numerical expectation.

In [1]:
import tensorflow as tf
import numpy

DEFAULT_TENSOR_TYPE = "float64"

nwalkers=1
nparticles=2
ndim=2

In [2]:

def generate_inputs(nwalkers, nparticles, ndim):

    inputs = numpy.random.uniform(size=[nwalkers, nparticles, ndim])

    return inputs

In [3]:
inputs = generate_inputs(nwalkers, nparticles, ndim)

In [4]:
# Create a low-level function for each row:
class f:
    def __init__(self, _alpha):
        self.alpha = _alpha

    def __call__(self, this_input):
        return tf.exp(- tf.reduce_sum(self.alpha * this_input**2, axis=(2)))

nets = []
for i in range(nparticles):
    val = numpy.random.random()
    a = f(val)
    nets.append(a)


In [5]:
def compute_matrix(inputs, _nets):
    rows = [_n(inputs) for _n in _nets]
    matrix = tf.stack(rows, axis=1)
    return matrix

In [6]:
compute_matrix(inputs, nets)

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[0.73954114, 0.71817769],
        [0.67011944, 0.6445591 ]]])>

In [7]:
detmat = lambda x : tf.reshape(tf.linalg.det(compute_matrix(x, nets)), (-1,1))

In [8]:
detmat(inputs)

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[-0.00458686]])>

## Tensorflow computation of derivatives of a callable:

In [9]:
@tf.function
def derivatives(w, inputs):

        n_walkers = inputs.shape[0]
        n_particles = inputs.shape[1]
        n_dim = inputs.shape[2]
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)
                print("w_of_x: ", w_of_x)
            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.jacobian(w_of_x, inputs)
            print(dw_dx.shape)
            dw_dx = tf.reshape(dw_dx, (n_walkers, n_particles, n_dim))


            
        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)


        
        # Extract the diagonal parts:
#         d2w_dx2 = tf.vectorized_map(tf.linalg.tensor_diag_part, d2w_dx2)
#         print(d2w_dx2)

        # # And this contracts:
#         d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
        d2w_dx2 = tf.einsum("wpdpp->wpd",d2w_dx2)
#         print(d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, dw_dx, d2w_dx2

In [10]:
w_of_x, dw_dx, d2w_dx2 = derivatives(detmat, tf.convert_to_tensor(inputs))

w_of_x:  Tensor("Reshape:0", shape=(1, 1), dtype=float64)
(1, 1, 1, 2, 2)


In [11]:
print(d2w_dx2)

tf.Tensor(
[[[-0.20154997 -0.76812648]
  [-0.19842823 -0.21865345]]], shape=(1, 2, 2), dtype=float64)


In [12]:
print(inputs.shape)

(1, 2, 2)


In [13]:
w_of_x

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[-0.00458686]])>

In [14]:
dw_dx

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[ 0.06203784,  0.1215111 ],
        [-0.1307704 , -0.02676069]]])>

In [15]:
d2w_dx2

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[-0.20154997, -0.76812648],
        [-0.19842823, -0.21865345]]])>

# Compare with numerical derivatives

In [16]:
def numerical_derivatives(f, x, dim, part, kick_size=1e-4):
    # Get the shapes:
    nwalkers = x.shape[0]
    nparticles = x.shape[1]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape)
    kick_size = 1e-4
    
    walkers = numpy.arange(nwalkers)

    if len(kick.shape) == 3:
        # Not single-particle
        kick[walkers,part, dim] += kick_size
    elif len(kick.shape) == 2:
        # single particle:
        kick[walkers, dim] += kick_size

#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x + 2*dx:
#     kicked_double_up_input = x + \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x - 2*dx
#     kicked_double_down_input = x - \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)

    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
#     w_prime_prime_num = -w_down_down + 16*w_down - 30* w_of_x + 16 * w_up - w_up_up
    w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [17]:

first = []
second = []
for dim in range(ndim):
    first.append([])
    second.append([])
    for part in range(nparticles):

        t_num_dw_dx, t_num_d2w_dx2 = numerical_derivatives(detmat, inputs, dim, part, kick_size=1e-6)
        first[dim].append(t_num_dw_dx)
        second[dim].append(t_num_d2w_dx2)
    # At the end of the loop, the list should be length n_particles, with nwalker entries each.
    # stack and flip it
    first[-1] = numpy.stack(first[-1]).T
    second[-1] = numpy.stack(second[-1]).T

num_dw_dx = numpy.stack(first, axis=-1)
num_d2w_dx2 = numpy.stack(second, axis=-1)
print(num_dw_dx.shape)
print(num_d2w_dx2.shape)

(1, 2, 2)
(1, 2, 2)


In [18]:
print(num_dw_dx.shape)
print(num_d2w_dx2.shape)

(1, 2, 2)
(1, 2, 2)


In [19]:
print(num_d2w_dx2)

[[[ 0.13617471 -0.01824917]
  [ 0.0897577  -0.16683246]]]


In [20]:
# Check against tensorflow derivatives:

In [21]:
num_dw_dx - dw_dx

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[-7.76148673e-10, -1.18106022e-09],
        [ 1.14077892e-09,  3.66977032e-10]]])>

In [22]:
num_d2w_dx2 - d2w_dx2

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[0.33772468, 0.74987731],
        [0.28818593, 0.05182099]]])>

In [23]:
(num_d2w_dx2 - d2w_dx2)/num_d2w_dx2

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[  2.48008372, -41.09104784],
        [  3.21070986,  -0.31061697]]])>

There is quite poor agreement in the second derivative!

## Even simpler case

Let's create a matrix of just one variable:

In [24]:
simple_inputs = tf.random.uniform((nwalkers,1), dtype=DEFAULT_TENSOR_TYPE)

In [25]:
class simple_matrix:
    
    def __init__(self):
        
        self.weights = tf.random.uniform((1,2,2), dtype=DEFAULT_TENSOR_TYPE)
        print(tf.reduce_sum(self.weights))
        
    def sm_base(self, x):
        return tf.reshape(x**3, (-1, 1,1)) * self.weights
    
    def matrix_first_deriv(self,x):
        return tf.reshape(3*x**2, (-1, 1, 1)) * self.weights
            
    
    def __call__(self, x):
        return tf.reshape(tf.linalg.det(self.sm_base(x)), (-1, 1))

In [26]:
sm = simple_matrix()

tf.Tensor(2.1436791254445833, shape=(), dtype=float64)


In [27]:
sm(simple_inputs)

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[0.23931646]])>

In [28]:
def simple_derivatives(w, inputs):

    
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)

            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.batch_jacobian(w_of_x, inputs)

        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        print("dw_dx: ", dw_dx)
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)

        print("d2w_dx2: ", d2w_dx2)
        
        # Extract the diagonal parts:
#         d2w_dx2 = tf.linalg.tensor_diag_part(d2w_dx2)
        
#         print(tf.hessians(w_of_x, inputs))

        # # And this contracts:
        # d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, tf.reshape(dw_dx, (-1,)), tf.reshape(d2w_dx2, (-1,))

In [29]:
def simple_numerical_derivatives(f, x, kick_size=1e-5):
    # Get the shapes:
    nwalkers = x.shape[0]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape) + kick_size
    
#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x + 2*dx:
    kicked_double_up_input = x + \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x - 2*dx
    kicked_double_down_input = x - \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)
    w_up_up = f(kicked_double_up_input)
    w_down_down = f(kicked_double_down_input)

    print(w_up)
    print(w_down_down)
    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
    w_prime_prime_num = -w_down_down + 16*w_down - 30* central_value + 16 * w_up - w_up_up
    print(w_prime_prime_num)
#     w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (12*kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [30]:
sm_of_x, dsm_dx, d2sm_dx2 = simple_derivatives(sm, simple_inputs)

dw_dx:  tf.Tensor([[[1.58497546]]], shape=(1, 1, 1), dtype=float64)
d2w_dx2:  tf.Tensor([[[[11.00703712]]]], shape=(1, 1, 1, 1), dtype=float64)


In [31]:
sm_of_x

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[0.23931646]])>

In [32]:
dsm_dx

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([1.58497546])>

In [33]:
d2sm_dx2

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([11.00703712])>

In [34]:
part=0; dim=0
num_dsm_dx, num_d2sm_dx2 = simple_numerical_derivatives(sm, simple_inputs,kick_size=1e-2)

tf.Tensor([[0.25561009]], shape=(1, 1), dtype=float64)
tf.Tensor([[0.20931583]], shape=(1, 1), dtype=float64)
tf.Tensor([[0.01049718]], shape=(1, 1), dtype=float64)


In [35]:
num_dsm_dx

array([1.5856192])

In [36]:
(num_dsm_dx - dsm_dx) / dsm_dx

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([0.00040616])>

In [37]:
num_d2sm_dx2

array([8.74764722])

In [38]:
(num_d2sm_dx2 - d2sm_dx2)

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([-2.25938989])>

In [39]:
(num_d2sm_dx2 - d2sm_dx2) / num_d2sm_dx2

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([-0.25828544])>

# Analytic Derivatives

By Jacobi's formula, for a matrix A:

$ \frac{\partial det(A)}{\partial t} = det(A) tr[A^{-1} \frac{\partial A}{\partial t }] $

In [40]:
def jacobi_formula(A_callable, inputs):
    with tf.GradientTape() as tape:
        tape.watch(inputs)
        matrix = A_callable.sm_base(inputs)
    print("matrix.shape: ", matrix.shape)
    inverse = tf.linalg.inv(matrix)
    print("inverse: ", inverse)
    print(inputs.shape)
    partial = A_callable.matrix_first_deriv(inputs)
    print("partial: ", partial)
    partial = tape.batch_jacobian(matrix, inputs)
    print("partial: ", partial)
    product = tf.matmul(inverse, partial)
    print("product: ", product)
    
    return tf.linalg.det(matrix) * tf.linalg.trace(product)


In [41]:
jacobi_formula(sm, simple_inputs)

matrix.shape:  (1, 2, 2)
inverse:  tf.Tensor(
[[[ 2.21883816 -0.22379812]
  [-2.12051733  2.09710442]]], shape=(1, 2, 2), dtype=float64)
(1, 1)
partial:  tf.Tensor(
[[[1.66192952 0.17735726]
  [1.68048396 1.75840201]]], shape=(1, 2, 2), dtype=float64)
partial:  tf.Tensor(
[[[[1.66192952]
   [0.17735726]]

  [[1.68048396]
   [1.75840201]]]], shape=(1, 2, 2, 1), dtype=float64)
product:  tf.Tensor(
[[[[ 3.64786042]
   [-3.15221364]]

  [[ 3.33519487]
   [ 0.1240573 ]]]], shape=(1, 2, 2, 1), dtype=float64)


<tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[0.87299305, 0.79816704]])>

In [42]:
print(dsm_dx)

tf.Tensor([1.58497546], shape=(1,), dtype=float64)
