### In tensorflow, the 2nd derivative of a matrix determinant does not match the numerical expectation.

In [1]:
import tensorflow as tf
import numpy

DEFAULT_TENSOR_TYPE = "float64"

nwalkers=2
nparticles=3
ndim=3

In [2]:
# Create random inputs as needed, of the right shape/
def generate_inputs(nwalkers, nparticles, ndim):

    inputs = numpy.random.uniform(size=[nwalkers, nparticles, ndim])

    return inputs

In [3]:
inputs = generate_inputs(nwalkers, nparticles, ndim)

In [4]:
print(inputs.shape)

(2, 3, 3)


### What is the function doing?

The "real" case is a function that forms a matrix by applying n_particle Neural Networks to n_particle individual inputs.  Each Neural Network R operates on a single particle of arbitrary number of dimensions, though 3 is the most interesting case.  The total input is of shape $(N, N_{particles}, N_{dim})$, and we have R_{N_{particles}} neural networks.  the matrix is then formed as:

$
R_0(x_0) ~ R_0(x_1) ~  ... ~ R_0(x_n) \\
R_1(x_0) ~ R_1(x_1) ~  ... ~ R_N(x_n) \\
. \\ 
. \\ 
. \\
R_N(x_0) ~ R_N(x_1) ~  ... ~ R_N(x_n) \\
$

The total function is a scalar: the determinant of this matrix.

In this notebook, for simplicity, instead of a neural network we're just using gaussian functions with one parameter.


In [5]:
# Create a low-level function for each row:
class f:
    def __init__(self, _alpha):
        self.alpha = _alpha

    def __call__(self, this_input):
        '''
        this is computed e^-alpha[x^2 + y^2 + z^2]
        '''
        return tf.exp(- tf.reduce_sum(self.alpha * this_input**2, axis=(2)))

nets = []
for i in range(nparticles):
    val = numpy.random.random()
    a = f(val)
    nets.append(a)


In [6]:
# this applies each "network" (actually a guassian in this notebook) to each particle, and stacks them into a matrix
def compute_matrix(inputs, _nets):
    rows = [_n(inputs) for _n in _nets]
    matrix = tf.stack(rows, axis=1)
    return matrix

In [7]:
matrix = compute_matrix(inputs, nets)
print(matrix)

tf.Tensor(
[[[0.83233935 0.37374074 0.22569332]
  [0.75848975 0.22707492 0.10622208]
  [0.78409486 0.27133    0.13905074]]

 [[0.42416383 0.86898265 0.38830493]
  [0.27476374 0.80934434 0.24053442]
  [0.32088127 0.83017026 0.28543172]]], shape=(2, 3, 3), dtype=float64)


In [8]:
nets[0](inputs)

<tf.Tensor: shape=(2, 3), dtype=float64, numpy=
array([[0.83233935, 0.37374074, 0.22569332],
       [0.42416383, 0.86898265, 0.38830493]])>

In [9]:
# Here are two functions that compute scalar values from this matrix.

# The determinant, as mentioned:
detmat = lambda x : tf.reshape(tf.linalg.det(compute_matrix(x, nets)), (-1,1))

# Same as above but calling a different TF function
def logdetmat(x):
    s, ld = tf.linalg.slogdet(compute_matrix(x, nets))
    return tf.reshape( s * tf.exp(ld), (-1, 1))

In [10]:
def sub_matrix(batch_matrix, row, column):
    left = batch_matrix[:,0:row,:]
    right = batch_matrix[:,row+1:,:]
    row_removed = tf.concat((left, right), axis=1)
    top = row_removed[:,:,0:column]
    bottom = row_removed[:,:,column+1:]
    return tf.concat((top, bottom), axis=2)

def custom_determinant(_matrix):

#     print("Entering")
#     print(_matrix)
    
    # Here is a custom, maybe slower, determinant implementation.
    # It operates over the batch size
    
    # The matrix should be a size [N, m, m] where N is the batch size.
        
    assert (_matrix.shape[1] == _matrix.shape[2])
    
    # Implementing this recursively, so start with the base case:
    
    if _matrix.shape[1] == 1: 
#         print("base case!")
        return tf.reshape(_matrix, (-1))
    else:
        # Need to get the submatrixes:
        sign = 1.0
        det  = 0.0
        for i in range(_matrix.shape[1]):
            sm = sub_matrix(_matrix, row=0, column=i)
#             print("sm: ", sm)
#             print("_matrix[:,i, 0]:", _matrix[:,0,i])
            sub_det = sign *_matrix[:,0,i]*custom_determinant(sm)
#             print("sub_det: ", sub_det)
            contribution =  sub_det
#             print("contribution: ", contribution)
            det += contribution
            sign *= -1.
        
        return det
    

In [11]:
custom_determinant(matrix)

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([ 2.65912086e-04, -6.36939195e-05])>

In [12]:
tf.linalg.det(matrix)

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([ 2.65912086e-04, -6.36939195e-05])>

In [13]:
# The custom determinant:
detmat_custom = lambda x : tf.reshape(custom_determinant(compute_matrix(x, nets)), (-1,1))


In [14]:
detmat(inputs)

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[ 2.65912086e-04],
       [-6.36939195e-05]])>

In [15]:
detmat_custom(inputs)

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[ 2.65912086e-04],
       [-6.36939195e-05]])>

In [16]:
logdetmat(inputs)

<tf.Tensor: shape=(2, 1), dtype=float64, numpy=
array([[ 2.65912086e-04],
       [-6.36939195e-05]])>

#### Everything is in agreement in value for all three functions

In [17]:
custom_determinant(matrix)

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([ 2.65912086e-04, -6.36939195e-05])>

In [18]:
tf.linalg.det(matrix)

<tf.Tensor: shape=(2,), dtype=float64, numpy=array([ 2.65912086e-04, -6.36939195e-05])>

In [19]:
s, ld  = tf.linalg.slogdet(matrix)
print(s*tf.exp(ld))

tf.Tensor([ 2.65912086e-04 -6.36939195e-05], shape=(2,), dtype=float64)


## Numerical differentiation

In [20]:
def numerical_derivatives(f, x, dim, part, kick_size=1e-4):
    # Get the shapes:
    nwalkers = x.shape[0]
    nparticles = x.shape[1]
    # Placeholder for a kick:
    kick = numpy.zeros(shape = x.shape)
    kick_size = 1e-4
    
    walkers = numpy.arange(nwalkers)

    if len(kick.shape) == 3:
        # Not single-particle
        kick[walkers,part, dim] += kick_size
    elif len(kick.shape) == 2:
        # single particle:
        kick[walkers, dim] += kick_size

#     print(kick)
    
    # x + dx:
    kicked_up_input = x + \
            tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x + 2*dx:
#     kicked_double_up_input = x + \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    # x - dx
    kicked_down_input = x - \
        tf.convert_to_tensor(kick, dtype=DEFAULT_TENSOR_TYPE)

#     # x - 2*dx
#     kicked_double_down_input = x - \
#         tf.convert_to_tensor(2*kick, dtype=DEFAULT_TENSOR_TYPE)
    
    central_value = f(x)
    w_up = f(kicked_up_input)
    w_down = f(kicked_down_input)

    
    # Use numpy to make slicing easier
    w_prime_fd = tf.reshape((w_up - w_down) / (2*kick_size), (nwalkers,)).numpy()
    # What about the second derivative?

    # https://math.stackexchange.com/questions/3756717/finite-differences-second-derivative-as-successive-application-of-the-first-deri
    # This gives precision of O(kick**4)
#     w_prime_prime_num = -w_down_down + 16*w_down - 30* w_of_x + 16 * w_up - w_up_up
    w_prime_prime_num = w_up + w_down - 2*central_value
    w_prime_prime_fd = tf.reshape(w_prime_prime_num/ (kick_size**2), (nwalkers,)).numpy()

    return w_prime_fd, w_prime_prime_fd

In [21]:
def full_numerical_derivatives(f, ndim, nparticles, kick_size=1e-6):
    first = []
    second = []
    for dim in range(ndim):
        first.append([])
        second.append([])
        for part in range(nparticles):

            t_num_dw_dx, t_num_d2w_dx2 = numerical_derivatives(f, inputs, dim, part, kick_size)
            first[dim].append(t_num_dw_dx)
            second[dim].append(t_num_d2w_dx2)
        # At the end of the loop, the list should be length n_particles, with nwalker entries each.
        # stack and flip it
        first[-1] = numpy.stack(first[-1]).T
        second[-1] = numpy.stack(second[-1]).T

    num_dw_dx = numpy.stack(first, axis=-1)
    num_d2w_dx2 = numpy.stack(second, axis=-1)
    
    return num_dw_dx, num_d2w_dx2
    


## Tensorflow computation of derivatives of a callable:

In [22]:
def derivatives(w, inputs):

        n_walkers = inputs.shape[0]
        n_particles = inputs.shape[1]
        n_dim = inputs.shape[2]
        # Using the outer-most tape to watch the computation of the first derivative:
        with tf.GradientTape() as tape:
            # Use the inner tape to watch the computation of the wavefunction:
            tape.watch(inputs)
            with tf.GradientTape() as second_tape:
                second_tape.watch(inputs)
                w_of_x = w(inputs)
            # Get the derivative of logw_of_x with respect to inputs
            dw_dx = second_tape.gradient(w_of_x, inputs)

            
        # Get the derivative of dlogw_dx with respect to inputs (aka second derivative)

        # We have to extract the diagonal of the jacobian, which comes out with shape
        # [nwalkers, nparticles, dimension, nwalkers, nparticles, dimension]

        # The indexes represent partial derivative indexes, so,
        # d2w_dx2[i_w, n1,d1, n2, d2] represents the second derivative of the
        # wavefunction at dimension d1

        # This is the full hessian computation:
        d2w_dx2 = tape.batch_jacobian(dw_dx, inputs)
        d2w_dx2 = tf.reshape(d2w_dx2, (n_walkers, n_particles*n_dim, n_particles*n_dim))

        
        # Extract the diagonal parts:
        d2w_dx2 = tf.vectorized_map(tf.linalg.tensor_diag_part, d2w_dx2)

        d2w_dx2 = tf.reshape(d2w_dx2, (-1, n_particles, n_dim))
        
        return w_of_x, dw_dx, d2w_dx2

In [23]:
w_of_x, dw_dx, d2w_dx2 = derivatives(logdetmat, tf.convert_to_tensor(inputs))
num_dw_dx, num_d2w_dx2 = full_numerical_derivatives(logdetmat, ndim, nparticles)

# Compare with numerical derivatives

In [24]:
# Error in the first derivative using the tf determinant, should be < 1e-6 (kick_size)
num_dw_dx - dw_dx

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 7.16275390e-12,  2.24241502e-12,  2.94030425e-12],
        [-3.08143268e-13,  2.70320875e-12,  8.63188427e-12],
        [-3.78174618e-12, -5.05935658e-12, -4.85289109e-12]],

       [[-2.93745974e-12, -7.13880268e-12, -1.72663641e-13],
        [-1.79237593e-12, -1.06337237e-12, -1.58892691e-12],
        [ 5.93439859e-12,  8.38320982e-12,  5.81487332e-13]]])>

In [25]:
# Error in the first derivative using the tf determinant, should be < 1e-6 (kick_size)
num_d2w_dx2 - d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[-2.12144931e-03, -1.26565997e-04, -2.24313538e-04],
        [ 7.94129972e-05,  3.33812677e-03,  5.88275189e-03],
        [ 1.25819290e-03,  2.18056172e-03,  2.01692124e-03]],

       [[ 5.33798062e-03,  8.45565487e-03,  2.21704051e-03],
        [-4.88294903e-03, -1.45538992e-03, -3.61198767e-03],
        [ 2.07485070e-02,  2.65361960e-02,  2.76021238e-03]]])>

In [26]:
# Compare the relative error in the second derivative using the custom operation:
(num_d2w_dx2 - d2w_dx2)/num_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 11.6418252 ,   0.11927465,   0.2203331 ],
        [ -0.11550726,  -3.2609662 ,  -4.57393179],
        [-35.28364784,  -5.083045  ,  -5.6149554 ]],

       [[  4.7568459 ,   6.77625287,   2.2251186 ],
        [-26.4377604 ,  -5.23644349, -16.47281571],
        [-26.85381152, -36.19497695,  -3.08260865]]])>

### There is quite poor agreement in the second derivative!

## Derivatives with custom determinant:

In [27]:
c_w_of_x, c_dw_dx, c_d2w_dx2 = derivatives(detmat_custom, tf.convert_to_tensor(inputs))
c_num_dw_dx, c_num_d2w_dx2 = full_numerical_derivatives(detmat_custom, ndim, nparticles)

In [28]:
# Error in the first derivative using the custom determinant, should be < 1e-6 (kick_size)
c_num_dw_dx - c_dw_dx

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 7.22725872e-12,  2.26436903e-12,  2.99505473e-12],
        [-3.18989626e-13,  2.73815345e-12,  8.62047933e-12],
        [-3.75545428e-12, -5.04580752e-12, -4.85803758e-12]],

       [[-2.99139880e-12, -7.19455745e-12, -1.86958088e-13],
        [-1.72149404e-12, -1.01614051e-12, -1.65235057e-12],
        [ 5.89765042e-12,  8.32850455e-12,  4.82140369e-13]]])>

In [29]:
# Absolute error in the second derivative using the custom determinant, should be < 1e-6 (kick_size):
c_num_d2w_dx2 - c_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 5.23191461e-10,  7.25433989e-10,  5.42739781e-10],
        [-1.15374940e-10,  2.72116184e-10,  7.50053152e-10],
        [-4.77161054e-11, -3.80347212e-11, -1.53211839e-10]],

       [[-1.16605593e-10, -1.92003261e-09, -2.26585155e-09],
        [ 3.05527041e-09,  3.96548538e-09,  1.51798726e-09],
        [ 4.78336239e-09,  4.26325480e-09,  4.15570045e-09]]])>

In [30]:
# Compare the relative error in the second derivative using the custom operation:
(c_num_d2w_dx2 - c_d2w_dx2)/c_num_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[-2.87111961e-06, -6.83642841e-07, -5.33109269e-07],
        [ 1.67814396e-07, -2.65826279e-07, -5.83178140e-07],
        [ 1.33812327e-06,  8.86617710e-08,  4.26530228e-07]],

       [[-1.03910932e-07, -1.53869089e-06, -2.27411448e-06],
        [ 1.65420018e-05,  1.42675489e-05,  6.92292176e-06],
        [-6.19091115e-06, -5.81504082e-06, -4.64110404e-06]]])>

This custom determinant is in line with expectations from a numerical approximation of the 2nd derivative!

## Derivatives with log determinant:

In [31]:
log_w_of_x, log_dw_dx, log_d2w_dx2 = derivatives(logdetmat, tf.convert_to_tensor(inputs))
log_num_dw_dx, log_num_d2w_dx2 = full_numerical_derivatives(logdetmat, ndim, nparticles)



In [32]:
# Error in the first derivative using the log determinant, should be < 1e-6 (kick_size)
log_num_dw_dx - log_dw_dx

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 7.16275390e-12,  2.24241502e-12,  2.94030425e-12],
        [-3.08143268e-13,  2.70320875e-12,  8.63188427e-12],
        [-3.78174618e-12, -5.05935658e-12, -4.85289109e-12]],

       [[-2.93745974e-12, -7.13880268e-12, -1.72663641e-13],
        [-1.79237593e-12, -1.06337237e-12, -1.58892691e-12],
        [ 5.93439859e-12,  8.38320982e-12,  5.81487332e-13]]])>

In [33]:
# Absolute error in the second derivative using the log determinant, should be < 1e-6 (kick_size):
log_num_d2w_dx2 - log_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[-2.12144931e-03, -1.26565997e-04, -2.24313538e-04],
        [ 7.94129972e-05,  3.33812677e-03,  5.88275189e-03],
        [ 1.25819290e-03,  2.18056172e-03,  2.01692124e-03]],

       [[ 5.33798062e-03,  8.45565487e-03,  2.21704051e-03],
        [-4.88294903e-03, -1.45538992e-03, -3.61198767e-03],
        [ 2.07485070e-02,  2.65361960e-02,  2.76021238e-03]]])>

In [34]:
# Compare the relative error in the second derivative using the log operation:
(log_num_d2w_dx2 - log_d2w_dx2)/log_num_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[ 11.6418252 ,   0.11927465,   0.2203331 ],
        [ -0.11550726,  -3.2609662 ,  -4.57393179],
        [-35.28364784,  -5.083045  ,  -5.6149554 ]],

       [[  4.7568459 ,   6.77625287,   2.2251186 ],
        [-26.4377604 ,  -5.23644349, -16.47281571],
        [-26.85381152, -36.19497695,  -3.08260865]]])>

In [35]:
# Comparing the two tensorflow determinant techniques:
d2w_dx2 - log_d2w_dx2

<tf.Tensor: shape=(2, 3, 3), dtype=float64, numpy=
array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])>

## Summary

There is a bug in the tensorflow computation of a 2nd derivative of a determinant.  Both `tf.linalg.det` and `tf.linalg.slogdet` exhibit this bug, making me think this is a logic error.

I assume the derivative of a determinant in practice is calculated with Jacobi's Formula (https://en.wikipedia.org/wiki/Jacobi%27s_formula)

*Because the 2nd derivative using det and slogdet agree, I think there may be an error in a derivative of Jacobi's formula*