In tensorflow, the 2nd derivative of a matrix determinant does not match the numerical expectation.

In [1]:
import tensorflow as tf
import numpy

DEFAULT_TENSOR_TYPE = "float64"

nwalkers=1
nparticles=2
ndim=2

In [2]:

def generate_inputs(nwalkers, nparticles, ndim):

    inputs = numpy.random.uniform(size=[nwalkers, nparticles, ndim])

    return inputs

In [3]:
inputs = generate_inputs(nwalkers, nparticles, ndim)

In [4]:
# Create a low-level function for each row:
class f:
    def __init__(self, _alpha):
        self.alpha = _alpha

    def __call__(self, this_input):
        return tf.exp(- tf.reduce_sum(self.alpha * this_input**2, axis=(2)))

nets = []
for i in range(nparticles):
    val = numpy.random.random()
    a = f(val)
    nets.append(a)


In [5]:
def compute_matrix(inputs, _nets):
    rows = [_n(inputs) for _n in _nets]
    matrix = tf.stack(rows, axis=1)
    return matrix

In [6]:
compute_matrix(inputs, nets)

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[0.5290295 , 0.61907671],
        [0.89510065, 0.91992681]]])>

In [7]:
detmat = lambda x : tf.reshape(tf.linalg.det(compute_matrix(x, nets)), (-1,1))

In [8]:
detmat(inputs)

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[-0.06746755]])>

## Tensorflow computation of derivatives of a callable:

In [12]:
@tf.function
def derivatives(w, inputs):

        n_walkers = inputs.shape[0]
        n_particles = inputs.shape[1]
        n_dim = inputs.shape[2]
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)
                print("w_of_x: ", w_of_x)
            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.jacobian(w_of_x, inputs)
            print(dw_dx.shape)
            dw_dx = tf.reshape(dw_dx, (n_walkers, n_particles, n_dim))


            
        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)


        
        # Extract the diagonal parts:
#         d2w_dx2 = tf.vectorized_map(tf.linalg.tensor_diag_part, d2w_dx2)
#         print(d2w_dx2)

        # # And this contracts:
#         d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
        d2w_dx2 = tf.einsum("wpdpp->wpd",d2w_dx2)
#         print(d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, dw_dx, d2w_dx2

In [13]:
w_of_x, dw_dx, d2w_dx2 = derivatives(detmat, tf.convert_to_tensor(inputs))

w_of_x:  Tensor("Reshape:0", shape=(1, 1), dtype=float64)
(1, 1, 1, 2, 2)


In [14]:
print(d2w_dx2)

tf.Tensor(
[[[-0.50746991  0.07901408]
  [-0.82868638 -1.82150049]]], shape=(1, 2, 2), dtype=float64)


In [20]:
print(inputs.shape)

(1, 2, 2)


In [21]:
w_of_x

<tf.Tensor: shape=(1, 1), dtype=float64, numpy=array([[-0.06746755]])>

In [22]:
dw_dx

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[-0.12301516, -0.49675014],
        [ 0.1708377 ,  0.50621721]]])>

In [23]:
d2w_dx2

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[-0.50746991,  0.07901408],
        [-0.82868638, -1.82150049]]])>

# Compare with numerical derivatives

In [24]:
def numerical_derivatives(f, x, dim, part, kick_size=1e-4):
    # Get the shapes:
    nwalkers = x.shape[0]
    nparticles = x.shape[1]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape)
    kick_size = 1e-4
    
    walkers = numpy.arange(nwalkers)

    if len(kick.shape) == 3:
        # Not single-particle
        kick[walkers,part, dim] += kick_size
    elif len(kick.shape) == 2:
        # single particle:
        kick[walkers, dim] += kick_size

#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x + 2*dx:
#     kicked_double_up_input = x + \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x - 2*dx
#     kicked_double_down_input = x - \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)

    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
#     w_prime_prime_num = -w_down_down + 16*w_down - 30* w_of_x + 16 * w_up - w_up_up
    w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [25]:

first = []
second = []
for dim in range(ndim):
    first.append([])
    second.append([])
    for part in range(nparticles):

        t_num_dw_dx, t_num_d2w_dx2 = numerical_derivatives(detmat, inputs, dim, part, kick_size=1e-6)
        first[dim].append(t_num_dw_dx)
        second[dim].append(t_num_d2w_dx2)
    # At the end of the loop, the list should be length n_particles, with nwalker entries each.
    # stack and flip it
    first[-1] = numpy.stack(first[-1]).T
    second[-1] = numpy.stack(second[-1]).T

num_dw_dx = numpy.stack(first, axis=-1)
num_d2w_dx2 = numpy.stack(second, axis=-1)
print(num_dw_dx.shape)
print(num_d2w_dx2.shape)

(1, 2, 2)
(1, 2, 2)


In [26]:
print(num_dw_dx.shape)
print(num_d2w_dx2.shape)

(1, 2, 2)
(1, 2, 2)


In [27]:
print(num_d2w_dx2)

[[[-0.48034052  0.23441486]
  [ 0.56258124  0.00677793]]]


In [390]:
# Check against tensorflow derivatives:

In [391]:
num_dw_dx - dw_dx

<tf.Tensor: shape=(1, 2, 2), dtype=float64, numpy=
array([[[ 2.27338068e-09,  1.13477260e-09],
        [-1.84688673e-09, -2.03367861e-09]]])>

In [291]:
num_d2w_dx2 - d2w_dx2

array([[[[[[[-0.19548536, -0.52591633, -0.63573312],
            [ 1.1818334 ,  0.77107426,  0.89058484]]],


          [[[-0.28713068, -0.0014551 , -0.45978581],
            [ 0.29752065,  0.4560978 ,  0.4358391 ]]],


          [[[-0.44206962, -0.50490796, -0.03812006],
            [ 0.64139798,  0.57858078,  0.61267331]]]],



         [[[[-0.09396909, -0.47487514, -0.37448749],
            [-0.78143362,  0.23464322,  0.11611512]]],


          [[[-0.19410837, -0.48351478, -0.41870805],
            [ 0.02230778, -0.09913718,  0.29431464]]],


          [[[-0.16952467, -0.48139379, -0.40785212],
            [-0.06276422,  0.32777074, -0.20664152]]]]]]])

In [292]:
(num_d2w_dx2 - d2w_dx2)/num_d2w_dx2

array([[[[[[[ 7.83480563e-01,  1.07704734e+00,  1.43450527e+00],
            [ 5.52203344e+00,  1.80851849e+00,  2.26669158e+00]]],


          [[[ 1.15078339e+00,  2.97995424e-03,  1.03748750e+00],
            [ 1.39014429e+00,  1.06975598e+00,  1.10928546e+00]]],


          [[[ 1.77175904e+00,  1.03402337e+00,  8.60163352e-02],
            [ 2.99688694e+00,  1.35703407e+00,  1.55935895e+00]]]],



         [[[[ 3.76616196e-01,  9.72517822e-01,  8.45015402e-01],
            [-3.65119362e+00,  5.50344659e-01,  2.95532953e-01]]],


          [[[ 7.77961785e-01,  9.90211337e-01,  9.44797245e-01],
            [ 1.04231511e-01, -2.32521602e-01,  7.49081377e-01]]],


          [[[ 6.79433401e-01,  9.85867666e-01,  9.20301289e-01],
            [-2.93261424e-01,  7.68770888e-01, -5.25938210e-01]]]]]]])

There is quite poor agreement in the second derivative!

## Even simpler case

Let's create a matrix of just one variable:

In [688]:
simple_inputs = tf.random.uniform((nwalkers,1), dtype=DEFAULT_TENSOR_TYPE)

In [689]:
class simple_matrix:
    
    def __init__(self):
        
        self.weights = tf.random.uniform((1,2,2), dtype=DEFAULT_TENSOR_TYPE)
        print(tf.reduce_sum(self.weights))
        
    def sm_base(self, x):
        return tf.reshape(x**3, (-1, 1,1)) * self.weights
    
    def matrix_first_deriv(self,x):
        return tf.reshape(3*x**2, (-1, 1, 1)) * self.weights
            
    
    def __call__(self, x):
        return tf.reshape(tf.linalg.det(self.sm_base(x)), (-1, 1))

In [690]:
sm = simple_matrix()

tf.Tensor(2.5066429626366755, shape=(), dtype=float64)


In [691]:
sm(simple_inputs)

<tf.Tensor: shape=(4, 1), dtype=float64, numpy=
array([[1.29166460e-03],
       [1.42177728e-05],
       [1.00243685e-06],
       [6.45791307e-04]])>

In [692]:
def simple_derivatives(w, inputs):

    
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)

            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.batch_jacobian(w_of_x, inputs)

        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        print("dw_dx: ", dw_dx)
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)

        print("d2w_dx2: ", d2w_dx2)
        
        # Extract the diagonal parts:
#         d2w_dx2 = tf.linalg.tensor_diag_part(d2w_dx2)
        
#         print(tf.hessians(w_of_x, inputs))

        # # And this contracts:
        # d2w_dx2 = tf.einsum("wpdpd->wpd",d2w_dx2)
        #
        # print("First einsum: ", d2w_dx2[0])
        #
        # print("Method diff 0: ", d2w_dx2_t[0] - d2w_dx2[0])
        # # TODO: test that the second derivative is correct with finite differences

        return w_of_x, tf.reshape(dw_dx, (-1,)), tf.reshape(d2w_dx2, (-1,))

In [693]:
def simple_numerical_derivatives(f, x, kick_size=1e-5):
    # Get the shapes:
    nwalkers = x.shape[0]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape) + kick_size
    
#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x + 2*dx:
    kicked_double_up_input = x + \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

    # x - 2*dx
    kicked_double_down_input = x - \
        tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)
    w_up_up = f(kicked_double_up_input)
    w_down_down = f(kicked_double_down_input)

    print(w_up)
    print(w_down_down)
    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
    w_prime_prime_num = -w_down_down + 16*w_down - 30* central_value + 16 * w_up - w_up_up
    print(w_prime_prime_num)
#     w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (12*kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [694]:
sm_of_x, dsm_dx, d2sm_dx2 = simple_derivatives(sm, simple_inputs)

dw_dx:  tf.Tensor(
[[[1.29393773e-02]]

 [[3.01982345e-04]]

 [[3.31260329e-05]]

 [[7.26159581e-03]]], shape=(4, 1, 1), dtype=float64)
d2w_dx2:  tf.Tensor(
[[[[0.12360076]]]


 [[[0.00611612]]]


 [[[0.00104382]]]


 [[[0.0778603 ]]]], shape=(4, 1, 1, 1), dtype=float64)


In [695]:
sm_of_x

<tf.Tensor: shape=(4, 1), dtype=float64, numpy=
array([[1.29166460e-03],
       [1.42177728e-05],
       [1.00243685e-06],
       [6.45791307e-04]])>

In [696]:
dsm_dx

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([1.29393773e-02, 3.01982345e-04, 3.31260329e-05, 7.26159581e-03])>

In [697]:
d2sm_dx2

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([0.12360076, 0.00611612, 0.00104382, 0.0778603 ])>

In [698]:
part=0; dim=0
num_dsm_dx, num_d2sm_dx2 = simple_numerical_derivatives(sm, simple_inputs,kick_size=1e-2)

tf.Tensor(
[[1.42658102e-03]
 [1.75178017e-05]
 [1.38279914e-06]
 [7.21895689e-04]], shape=(4, 1), dtype=float64)
tf.Tensor(
[[1.05354256e-03]
 [9.15142762e-06]
 [4.97683121e-07]
 [5.13506943e-04]], shape=(4, 1), dtype=float64)
tf.Tensor(
[[1.29621482e-04]
 [6.41403539e-06]
 [1.09466383e-06]
 [8.16529604e-05]], shape=(4, 1), dtype=float64)


In [699]:
num_dsm_dx

array([1.29514014e-02, 3.03244233e-04, 3.34612803e-05, 7.27009806e-03])

In [700]:
(num_dsm_dx - dsm_dx) / dsm_dx

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([0.00092927, 0.00417868, 0.01012036, 0.00117085])>

In [701]:
num_d2sm_dx2

array([0.1080179 , 0.00534503, 0.00091222, 0.06804413])

In [702]:
(num_d2sm_dx2 - d2sm_dx2)

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([-0.01558286, -0.00077109, -0.0001316 , -0.00981617])>

In [703]:
(num_d2sm_dx2 - d2sm_dx2) / num_d2sm_dx2

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([-0.14426179, -0.14426225, -0.14426458, -0.14426181])>

# Analytic Derivatives

By Jacobi's formula, for a matrix A:

$ \frac{\partial det(A)}{\partial t} = det(A) tr[A^{-1} \frac{\partial A}{\partial t }] $

In [336]:
def jacobi_formula(A_callable, inputs):
    with tf.GradientTape() as tape:
        tape.watch(inputs)
        matrix = A_callable.sm_base(inputs)
    print("matrix.shape: ", matrix.shape)
    inverse = tf.linalg.inv(matrix)
    print("inverse: ", inverse)
    print(inputs.shape)
    partial = A_callable.matrix_first_deriv(inputs)
    print("partial: ", partial)
    partial = tape.batch_jacobian(matrix, inputs)
    print("partial: ", partial)
    product = tf.matmul(inverse, partial)
    print("product: ", product)
    
    return tf.linalg.det(matrix) * tf.linalg.trace(product)


In [337]:
jacobi_formula(sm, simple_inputs)

matrix.shape:  (4, 2, 2)
inverse:  tf.Tensor(
[[[ 3.39209789e+07 -1.96147934e+07]
  [-9.55033055e+06  1.14404444e+07]]

 [[ 5.92428478e+02 -3.42571546e+02]
  [-1.66796124e+02  1.99806882e+02]]

 [[ 2.59318320e+02 -1.49950722e+02]
  [-7.30101476e+01  8.74596463e+01]]

 [[ 1.99946036e+01 -1.15618721e+01]
  [-5.62940929e+00  6.74353034e+00]]], shape=(4, 2, 2), dtype=float64)
(4, 1)
partial:  tf.Tensor(
[[[2.90427976e-05 4.97942612e-05]
  [2.42445406e-05 8.61120507e-05]]

 [[4.31446300e-02 7.39720396e-02]
  [3.60165624e-02 1.27924059e-01]]

 [[7.48391018e-02 1.28312631e-01]
  [6.24746852e-02 2.21898338e-01]]

 [[4.13119315e-01 7.08298537e-01]
  [3.44866501e-01 1.22490098e+00]]], shape=(4, 2, 2), dtype=float64)
partial:  tf.Tensor(
[[[[2.90427976e-05]
   [4.97942612e-05]]

  [[2.42445406e-05]
   [8.61120507e-05]]]


 [[[4.31446300e-02]
   [7.39720396e-02]]

  [[3.60165624e-02]
   [1.27924059e-01]]]


 [[[7.48391018e-02]
   [1.28312631e-01]]

  [[6.24746852e-02]
   [2.21898338e-01]]]


 [[[4

InvalidArgumentError: In[0] and In[1] must have compatible batch dimensions: [4,2,2] vs. [4,2,2,1] [Op:BatchMatMulV2]

In [304]:
print(dsm_dx)

tf.Tensor([-5.82653770e-01 -1.74059201e-02 -1.19563478e-04 -4.90531300e-02], shape=(4,), dtype=float64)
