In tensorflow, the 2nd derivative of a matrix determinant does not match the numerical expectation.

In [53]:
import tensorflow as tf
import numpy

DEFAULT_TENSOR_TYPE = "float64"

nwalkers=2
nparticles=2
ndim=1

In [85]:

def generate_inputs(nwalkers, nparticles, ndim):

    inputs = numpy.random.uniform(size=[nwalkers, nparticles, ndim])

    return inputs

In [86]:
inputs = generate_inputs(nwalkers, nparticles, ndim)

In [87]:
# Create a low-level function for each row:
class f:
    def __init__(self, _alpha):
        self.alpha = _alpha

    def __call__(self, this_input):
        '''
        this is computed e^-alpha[x^2 + y^2 + z^2]
        '''
        return tf.exp(- tf.reduce_sum(self.alpha * this_input**2, axis=(2)))

nets = []
for i in range(nparticles):
    val = numpy.random.random()
    a = f(val)
    nets.append(a)


In [88]:
def compute_matrix(inputs, _nets):
    rows = [_n(inputs) for _n in _nets]
    matrix = tf.stack(rows, axis=1)
    return matrix

In [89]:
compute_matrix(inputs, nets)

<tf.Tensor: shape=(2, 2, 2), dtype=float64, numpy=
array([[[0.69038104, 0.98185776],
        [0.63837003, 0.97806487]],

       [[0.87714892, 0.98081428],
        [0.85317696, 0.97680581]]])>

In [90]:
detmat = lambda x : tf.reshape(tf.linalg.det(compute_matrix(x, nets)), (-1,1))

In [91]:
detmat(inputs)

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[0.04844887],
       [0.01999602]])>

## Numerical differentiation

In [92]:
def numerical_derivatives(f, x, dim, part, kick_size=1e-4):
    # Get the shapes:
    nwalkers = x.shape[0]
    nparticles = x.shape[1]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape)
    kick_size = 1e-4
    
    walkers = numpy.arange(nwalkers)

    if len(kick.shape) == 3:
        # Not single-particle
        kick[walkers,part, dim] += kick_size
    elif len(kick.shape) == 2:
        # single particle:
        kick[walkers, dim] += kick_size

#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x + 2*dx:
#     kicked_double_up_input = x + \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x - 2*dx
#     kicked_double_down_input = x - \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)

    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
#     w_prime_prime_num = -w_down_down + 16*w_down - 30* w_of_x + 16 * w_up - w_up_up
    w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [93]:

first = []
second = []
for dim in range(ndim):
    first.append([])
    second.append([])
    for part in range(nparticles):

        t_num_dw_dx, t_num_d2w_dx2 = numerical_derivatives(detmat, inputs, dim, part, kick_size=1e-6)
        first[dim].append(t_num_dw_dx)
        second[dim].append(t_num_d2w_dx2)
    # At the end of the loop, the list should be length n_particles, with nwalker entries each.
    # stack and flip it
    first[-1] = numpy.stack(first[-1]).T
    second[-1] = numpy.stack(second[-1]).T

num_dw_dx = numpy.stack(first, axis=-1)
num_d2w_dx2 = numpy.stack(second, axis=-1)
print(num_dw_dx.shape)
print(num_d2w_dx2.shape)

(2, 2, 1)
(2, 2, 1)


## Tensorflow computation of derivatives of a callable:

In [94]:
def derivatives(w, inputs):

        n_walkers = inputs.shape[0]
        n_particles = inputs.shape[1]
        n_dim = inputs.shape[2]
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)
                print("w_of_x: ", w_of_x)
            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.gradient(w_of_x, inputs)
#             dw_dx = second_tape.batch_jacobian(w_of_x, inputs)
            print(dw_dx)
            dw_dx = tf.reshape(dw_dx, (n_walkers, n_particles, n_dim))


            
        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)
        print(d2w_dx2)

        
        # Extract the diagonal parts:
        d2w_dx2 = tf.vectorized_map(tf.linalg.tensor_diag_part, d2w_dx2)
        print(d2w_dx2)

        # # And this contracts:
#         d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
#         d2w_dx2 = tf.einsum("wpdpp->wpd",d2w_dx2)
#         print(d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, dw_dx, d2w_dx2

In [95]:
w_of_x, dw_dx, d2w_dx2 = derivatives(detmat, tf.convert_to_tensor(inputs))

w_of_x:  tf.Tensor(
[[0.04844887]
 [0.01999602]], shape=(2, 1), dtype=float64)
tf.Tensor(
[[[ 0.07607763]
  [-0.03846837]]

 [[ 0.08446972]
  [-0.04162483]]], shape=(2, 2, 1), dtype=float64)
tf.Tensor(
[[[[[-0.21982436]
    [ 0.04240996]]]


  [[[-0.04193168]
    [-0.18177932]]]]



 [[[[ 0.05629544]
    [ 0.00619567]]]


  [[[-0.03219107]
    [-0.19160706]]]]], shape=(2, 2, 1, 2, 1), dtype=float64)
tf.Tensor(
[[[-0.21982436]
  [-0.18177932]]

 [[ 0.05629544]
  [-0.19160706]]], shape=(2, 2, 1), dtype=float64)


In [96]:
print(d2w_dx2)

tf.Tensor(
[[[-0.21982436]
  [-0.18177932]]

 [[ 0.05629544]
  [-0.19160706]]], shape=(2, 2, 1), dtype=float64)


In [97]:
print(num_d2w_dx2)

[[[-0.1074315 ]
  [-0.19663074]]

 [[ 0.06588023]
  [-0.20433577]]]


# Compare with numerical derivatives

In [98]:
print(num_d2w_dx2)

[[[-0.1074315 ]
  [-0.19663074]]

 [[ 0.06588023]
  [-0.20433577]]]


In [99]:
# Check against tensorflow derivatives:

In [100]:
num_dw_dx - dw_dx

<tf.Tensor: shape=(2, 2, 1), dtype=float64, numpy=
array([[[-6.81367129e-10],
        [ 3.97317346e-10]],

       [[-9.40860945e-10],
        [ 4.70982919e-10]]])>

In [101]:
num_d2w_dx2 - d2w_dx2

<tf.Tensor: shape=(2, 2, 1), dtype=float64, numpy=
array([[[ 0.11239286],
        [-0.01485143]],

       [[ 0.00958479],
        [-0.01272871]]])>

In [102]:
(num_d2w_dx2 - d2w_dx2)/num_d2w_dx2

<tf.Tensor: shape=(2, 2, 1), dtype=float64, numpy=
array([[[-1.04618157],
        [ 0.07552952]],

       [[ 0.14548808],
        [ 0.06229312]]])>

There is quite poor agreement in the second derivative!

## Even simpler case

Let's create a matrix of just one variable:

In [25]:
simple_inputs = tf.random.uniform((nwalkers,1), dtype=DEFAULT_TENSOR_TYPE)

In [26]:
class simple_matrix:
    
    def __init__(self):
        
        self.weights = tf.random.uniform((1,2,2), dtype=DEFAULT_TENSOR_TYPE)
        print(tf.reduce_sum(self.weights))
        
    def sm_base(self, x):
        return tf.reshape(x**3, (-1, 1,1)) * self.weights
    
    def matrix_first_deriv(self,x):
        return tf.reshape(3*x**2, (-1, 1, 1)) * self.weights
            
    
    def __call__(self, x):
        return tf.reshape(tf.linalg.det(self.sm_base(x)), (-1, 1))

In [27]:
sm = simple_matrix()

tf.Tensor(1.2871075918646473, shape=(), dtype=float64)


In [28]:
sm(simple_inputs)

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[3.12273893e-02],
       [8.53440729e-10]])>

In [29]:
def simple_derivatives(w, inputs):

    
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)

            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.batch_jacobian(w_of_x, inputs)

        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        print("dw_dx: ", dw_dx)
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)

        print("d2w_dx2: ", d2w_dx2)
        
        # Extract the diagonal parts:
#         d2w_dx2 = tf.linalg.tensor_diag_part(d2w_dx2)
        
#         print(tf.hessians(w_of_x, inputs))

        # # And this contracts:
        # d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, tf.reshape(dw_dx, (-1,)), tf.reshape(d2w_dx2, (-1,))

In [30]:
def simple_numerical_derivatives(f, x, kick_size=1e-5):
    # Get the shapes:
    nwalkers = x.shape[0]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape) + kick_size
    
#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x + 2*dx:
    kicked_double_up_input = x + \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x - 2*dx
    kicked_double_down_input = x - \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)
    w_up_up = f(kicked_double_up_input)
    w_down_down = f(kicked_double_down_input)

    print(w_up)
    print(w_down_down)
    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
    w_prime_prime_num = -w_down_down + 16*w_down - 30* central_value + 16 * w_up - w_up_up
    print(w_prime_prime_num)
#     w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (12*kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [31]:
sm_of_x, dsm_dx, d2sm_dx2 = simple_derivatives(sm, simple_inputs)

dw_dx:  tf.Tensor(
[[[2.28819143e-01]]

 [[1.13943729e-07]]], shape=(2, 1, 1), dtype=float64)
d2w_dx2:  tf.Tensor(
[[[[1.41672353e+00]]]


 [[[1.28541560e-05]]]], shape=(2, 1, 1, 1), dtype=float64)


In [32]:
sm_of_x

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[3.12273893e-02],
       [8.53440729e-10]])>

In [33]:
dsm_dx

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([2.28819143e-01, 1.13943729e-07])>

In [34]:
d2sm_dx2

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([1.41672353e+00, 1.28541560e-05])>

In [35]:
part=0; dim=0
num_dsm_dx, num_d2sm_dx2 = simple_numerical_derivatives(sm, simple_inputs,kick_size=1e-2)

tf.Tensor(
[[3.35865902e-02]
 [2.84908670e-09]], shape=(2, 1), dtype=float64)
tf.Tensor(
[[2.69215168e-02]
 [2.49321101e-11]], shape=(2, 1), dtype=float64)
tf.Tensor(
[[1.67667555e-03]
 [1.52027959e-08]], shape=(2, 1), dtype=float64)


In [36]:
num_dsm_dx

array([2.28932906e-01, 1.33029271e-07])

In [37]:
(num_dsm_dx - dsm_dx) / dsm_dx

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([0.00049717, 0.16749972])>

In [38]:
num_d2sm_dx2

array([1.39722962e+00, 1.26689966e-05])

In [39]:
(num_d2sm_dx2 - d2sm_dx2)

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([-1.94939106e-02, -1.85159411e-07])>

In [40]:
(num_d2sm_dx2 - d2sm_dx2) / num_d2sm_dx2

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([-0.01395183, -0.01461516])>

# Analytic Derivatives

By Jacobi's formula, for a matrix A:

$ \frac{\partial det(A)}{\partial t} = det(A) tr[A^{-1} \frac{\partial A}{\partial t }] $

In [41]:
def jacobi_formula(A_callable, inputs):
    with tf.GradientTape() as tape:
        tape.watch(inputs)
        matrix = A_callable.sm_base(inputs)
    print("matrix.shape: ", matrix.shape)
    inverse = tf.linalg.inv(matrix)
    print("inverse: ", inverse)
    print(inputs.shape)
    partial = A_callable.matrix_first_deriv(inputs)
    print("partial: ", partial)
    partial = tape.batch_jacobian(matrix, inputs)
    print("partial: ", partial)
    product = tf.matmul(inverse, partial)
    print("product: ", product)
    
    return tf.linalg.det(matrix) * tf.linalg.trace(product)


In [42]:
jacobi_formula(sm, simple_inputs)

matrix.shape:  (2, 2, 2)
inverse:  tf.Tensor(
[[[ 3.56109784e+00 -2.82391986e+00]
  [-4.04427724e+00  1.21995764e+01]]

 [[ 2.15409604e+04 -1.70817957e+04]
  [-2.44636962e+04  7.37948248e+04]]], shape=(2, 2, 2), dtype=float64)
(2, 1)
partial:  tf.Tensor(
[[[1.39574830e+00 3.23083461e-01]
  [4.62704027e-01 4.07423678e-01]]

 [[4.20422875e-03 9.73181750e-04]
  [1.39374239e-03 1.22722867e-03]]], shape=(2, 2, 2), dtype=float64)
partial:  tf.Tensor(
[[[[1.39574830e+00]
   [3.23083461e-01]]

  [[4.62704027e-01]
   [4.07423678e-01]]]


 [[[4.20422875e-03]
   [9.73181750e-04]]

  [[1.39374239e-03]
   [1.22722867e-03]]]], shape=(2, 2, 2, 1), dtype=float64)
product:  tf.Tensor(
[[[[ 4.05803446e+00]
   [-1.70331175e+00]]

  [[ 3.00756106e+03]
   [ 1.87463081e+04]]]


 [[[ 1.22234826e-02]
   [-5.13066160e-03]]

  [[ 9.05928001e+00]
   [ 5.64670346e+01]]]], shape=(2, 2, 2, 1), dtype=float64)


<tf.Tensor: shape=(2, 2), dtype=float64, numpy=
array([[1.26721822e-01, 2.56677511e-06],
       [3.81707451e-04, 7.73155853e-09]])>

In [43]:
print(dsm_dx)

tf.Tensor([2.28819143e-01 1.13943729e-07], shape=(2,), dtype=float64)
