# First and second derivative of FNN with respect to input (tensorflow)

Import necessary packages.

In [251]:
import tensorflow as tf
import numpy as np

In [252]:
#tf.keras.backend.set_floatx('float32')

Define activation function and its derivatives.

In [253]:
# Custom activation function
# from keras.layers import Activation
# from keras import backend as K
# from keras.utils.generic_utils import get_custom_objects

#def mσ(x):
    #return np.abs(x) + np.log(1. + np.exp(-2. * np.abs(x)))
    
def mσ(x):
    return tf.math.divide(1., 1. + tf.math.exp(tf.math.negative(x)))

# get_custom_objects().update({'custom_activation': Activation(mσ)})


In [254]:
#def mdσ(x):
    #return np.tanh(x)
    
    
#def md2σ(x):
    #return np.divide(1., np.square(np.cosh(x)))

def mdσ(x):
    return mσ(x) * (1. - mσ(x))
    
    
def md2σ(x):
    return mσ(x) * (1. - mσ(x)) * (1. - 2.*mσ(x))

In [255]:
x = tf.random.uniform((10,4))
W = tf.random.uniform((4,1))
b = tf.random.uniform((1,1))
z = x @ W + b
print(z.shape)
print(mdσ(z).shape)

(10, 1)
(10, 1)


In [256]:
x = [[10.], [20.], [30.]]

print(mσ(x))
print(tf.keras.activations.sigmoid(x))
print(mdσ(x))
print(md2σ(x))

tf.Tensor(
[[0.9999546]
 [1.       ]
 [1.       ]], shape=(3, 1), dtype=float32)
tf.Tensor(
[[0.9999546]
 [1.       ]
 [1.       ]], shape=(3, 1), dtype=float32)
tf.Tensor(
[[4.5416677e-05]
 [0.0000000e+00]
 [0.0000000e+00]], shape=(3, 1), dtype=float32)
tf.Tensor(
[[-4.541255e-05]
 [-0.000000e+00]
 [-0.000000e+00]], shape=(3, 1), dtype=float32)


Does not exactly match the results/values in Julia.

Define Neural Network.

In [257]:
# Define model architecture
class PINN(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 output_dim=1,
                 num_hidden_layers=3,
                 num_neurons_per_layer=20,
                 activationfunction = 'sigmoid',
                 kernel_initializer='glorot_normal',
                 **kwargs):
        
        super().__init__(**kwargs)

        self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = output_dim

        # Define NN architecture
        
        # Inititialize num_hidden_layers many fully connected dense layers
        self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                                             activation = activationfunction,
                                             kernel_initializer=kernel_initializer) for _ in range(self.num_hidden_layers)]
        
        # Output layer
        #self.out = tf.keras.layers.Dense(output_dim, activation=None)
        self.out = tf.keras.layers.Dense(output_dim, activation = activationfunction)
        
    def call(self, X):
        """Forward-pass through neural network."""
        self.tmp_layer_output = [X]
        #Z = self.scale(X)
        Z = X
        
        for i in range(self.num_hidden_layers):
            Z = self.hidden[i](Z)
            self.tmp_layer_output.append(Z)
            
        return self.out(Z)
    
    def get_gradient(self, x):
        output = self.call(x)
        δ = get_gradient_layer(self.out.weights[0], self.out.weights[1], self.tmp_layer_output[-1], np.identity(self.output_dim))

        for k in range(self.num_hidden_layers-1, -1, -1):
            δ = get_gradient_layer(self.hidden[k].weights[0], self.hidden[k].weights[1], self.tmp_layer_output[k], δ)

        return output, δ
    

    def get_gradient_and_hessian(self, x):
        #x = tf.reshape(x, (1,2))
        output = self.call(x)
        δ,ϑ = get_gradient_hessian_last_layer(self.out.weights[0], self.out.weights[1], self.tmp_layer_output[-1], np.identity(self.output_dim))

        for k in range(self.num_hidden_layers-1, -1, -1):
            δ,ϑ = get_gradient_hessian_hidden_layer(self.hidden[k].weights[0], self.hidden[k].weights[1], self.tmp_layer_output[k], δ,  ϑ)

        return output, δ, ϑ

Compute gradient.

Compute gradient for layer l.

In [258]:
def get_gradient_layer(W,b,a,δ):
#     z1 = tf.transpose(a @ W + b)  
#     b = tf.reshape(b, z1.shape)
#     z2 = z1 + b
#     z3 = mdσ(z1) * δ
    return W @ (mdσ(tf.transpose(a @ W + b)) * δ)

Compute gradient of neural network.

In [259]:
# def get_gradient(N, x):
#     output = N(x)
#     δ = get_gradient_layer(N.out.weights[0], N.out.weights[1], N.tmp_layer_output[-1], 1.)

#     for k in range(N.num_hidden_layers-1, -1, -1):
#         δ = get_gradient_layer(N.hidden[k].weights[0], N.hidden[k].weights[1], N.tmp_layer_output[k], δ)
            
#     return output, δ

Compute gradient and Hessian of last layer.

In [260]:
def get_gradient_hessian_last_layer(W,b,a,δ):
#     z1 = tf.transpose(a @ W)  
#     b = tf.reshape(b, z1.shape)
#     z2 = z1 + b
#     z3 = mdσ(z2) * δ
#     ϑ = tf.linalg.diag(tf.reshape(md2σ(z2), [-1]))''

#     z = tf.transpose(a @ W + b)
#     return W @ (mdσ(z) * δ), W @ (md2σ(z) * tf.transpose(W))
    
    z = tf.transpose(a @ W + b)
    return W @ (mdσ(z) * δ), W @ (tf.tensordot(tf.reshape(md2σ(z), [-1]), tf.transpose(W), axes=0))

Compute gradient and Hessian of hidden layer.

In [261]:
def get_gradient_hessian_hidden_layer(W,b,a,δ,ϑ):
#     z1 = tf.transpose(a @ W)  
#     b = tf.reshape(b, np.shape(z1))
#     z2 = z1 + b
#     z3 = mdσ(z2) * δ
#     t2 = δ * md2σ(z2)
#     H1 = W @ tf.linalg.diag(tf.reshape(t2, [-1])) @ tf.transpose(W)
#     dσt = tf.linalg.diag(tf.reshape(mdσ(z2), [-1]))
#     H2 = W @ dσt @ ϑ @ dσt @ tf.transpose(W)

#     z = tf.transpose(a @ W + b)
#     dσt = mdσ(z) * tf.transpose(W)
#     return W @ (mdσ(z) * δ), W @ ((δ * md2σ(z)) * tf.transpose(W)) + tf.transpose(dσt) @ ϑ @ dσt 

    z = tf.transpose(a @ W + b)
    WT = tf.transpose(W)
    dσt = tf.reshape([tf.reshape(i, (i.shape[0], 1)) * WT for i in tf.unstack(mdσ(z), axis = 1)], (z.shape[1],WT.shape[0],WT.shape[1]))
    dσtT = tf.reshape([tf.transpose(i) for i in tf.unstack(dσt, axis = 0)], (dσt.shape[0],dσt.shape[2],dσt.shape[1]))
    t1 = δ * md2σ(z)
    t2 = tf.reshape([tf.reshape(i, (i.shape[0], 1)) * WT for i in tf.unstack(t1, axis = 1)], (t1.shape[1],WT.shape[0],WT.shape[1]))
    A = W @ t2
    B = dσtT @ ϑ @ dσt 
    return W @ (mdσ(z) * δ), A + B

Compute Hessian and gradient of neural network.

In [262]:
# def get_hessian(N, x):
#     #x = tf.reshape(x, (1,2))
#     output = N(x)
#     δ,ϑ = get_gradient_hessian_last_layer(N.out.weights[0], N.out.weights[1], N.tmp_layer_output[-1], 1.)

#     for k in range(N.num_hidden_layers-1, -1, -1):
#         δ,ϑ = get_gradient_hessian_hidden_layer(N.hidden[k].weights[0], N.hidden[k].weights[1], N.tmp_layer_output[k], δ,  ϑ)
      
    
#     return output, δ, ϑ

In [263]:
x = tf.random.normal((20,))
A = tf.random.normal((20,4))
# print(x)
# print(A)
# print(tf.math.multiply(A,x))
# print(tf.math.multiply(x,A))
# print(A)
# M = tf.tensordot(x, A, axes=0)
# print(M)
# B = tf.random.normal((3,3))
# print(B @ M)
# print(M@M)
#tf.reshape(x, [-1])
#tf.tensordot(tf.transpose(x), A, axes=1)
#[x[:,i] * A for i in range(x.shape[0])]
# for i in tf.unstack(x, axis = 1):
#     print(i*A)
# B = tf.random.normal((3,20,20))
# C = tf.random.normal((3,20,20))
# B@C
# print(tf.reshape([i * A for i in tf.unstack(x, axis = 1)], (3,20,20)))
# print(tf.transpose(tf.reshape([i * A for i in tf.unstack(x, axis = 1)], (3,20,20))))
#print(A)

#print(tf.reshape([tf.transpose(i) for i in tf.unstack(A, axis = 0)], (3,4,4)))

tf.reshape(x, (x.shape[0], 1))*A

<tf.Tensor: shape=(20, 4), dtype=float32, numpy=
array([[ 1.8318875e-02,  1.9722445e-02, -1.2099607e-01, -5.3044055e-02],
       [ 3.1326380e-01,  1.7344353e+00,  2.5658166e-01, -1.0187318e+00],
       [ 2.0118143e-01, -1.5569185e-01, -2.5193062e-01, -6.8841517e-01],
       [-5.1701792e-02,  8.9940103e-03,  5.3145830e-03, -4.3171961e-02],
       [-2.2841246e-01,  4.6632549e-01,  5.1627588e-01,  1.8602259e-01],
       [ 1.3212215e+00,  1.9395611e+00, -6.9541252e-01,  4.3820586e-02],
       [-1.2883713e+00, -2.9338384e+00, -1.1029279e+00,  1.5042586e+00],
       [-1.0875108e+00, -1.9109375e+00,  8.5978377e-01, -2.0167856e+00],
       [-1.6927073e+00,  2.4919282e-01, -1.1295455e+00,  1.0537713e+00],
       [-1.4724010e-02, -1.0798075e-03, -5.6281447e-02,  4.5124345e-02],
       [ 7.5288743e-01,  1.0207346e-01,  2.5509557e-01, -3.1016055e-01],
       [ 1.0784518e-01, -7.3585458e-02, -4.5844603e-02, -1.2507530e-01],
       [-1.4665782e-01,  3.4777382e-01,  1.9384292e-01,  5.7739604e-01],
  

In [268]:
NeuralN = PINN()

x = tf.random.normal((1,2))

out2, δ2, ϑ = NeuralN.get_gradient_and_hessian(x)
out_ad, yx_ad, yt_ad, yxx_ad = _fvals2_ad(NeuralN, x[:,0], x[:,1])

print(yxx_ad)
print(ϑ[0, 1, 1])

(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
tf.Tensor([-1.327027e-05], shape=(1,), dtype=float32)
tf.Tensor(-1.3270246e-05, shape=(), dtype=float32)


Why do we get a 2D vector when we insert a 2D vector?

In [270]:
NeuralN = PINN()

x = tf.random.normal((10,2))

#out = NeuralN(x)
#print(out)

out1, δ1 = NeuralN.get_gradient(x)
out2, δ2, ϑ = NeuralN.get_gradient_and_hessian(x)

print(δ1- δ2)
print(δ1)
#print(tf.reshape(ϑ, [-1]))

(20, 10)
(20, 20)
(20, 10)
(20, 20)
(20, 10)
(20, 2)
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]], shape=(2, 10), dtype=float32)
tf.Tensor(
[[ 0.00021457  0.00020477  0.00019922  0.00030564  0.00019726  0.0002298
   0.0002109   0.00026123  0.00019085  0.00028901]
 [-0.00036115 -0.0004447  -0.00035387 -0.00041372 -0.00032382 -0.0004113
  -0.00037382 -0.00039567 -0.00032715 -0.00038928]], shape=(2, 10), dtype=float32)


-> We need to choose appropriate dtypes so that no operation overflows.

In [232]:
# def _fvals1(N, x):

#     with tf.GradientTape() as g:
#         g.watch(x)
#         y = N(x)

#     dy_dx = g.gradient(y, x)
#     dy_dx = np.transpose(dy_dx.numpy())

#     return y, dy_dx

def _fvals1(N, t, x):

    with tf.GradientTape(persistent=True) as tape:
        tape.watch(t)
        tape.watch(x)
        tx = tf.stack([t, x], axis=1)
        y = N(tx)
                
    yt = tape.gradient(y, t)
    yx = tape.gradient(y, x)

    return y, yt, yx

In [233]:

for i in range(10):
    x = tf.random.uniform((1,2))
    NeuralN = PINN()
    out1, δ1 = NeuralN.get_gradient(x)
    out2, δ2, ϑ = NeuralN.get_gradient_and_hessian(x)
    out_ad, yt_ad, yx_ad = _fvals1(NeuralN, x[:,0], x[:,1])
    print(np.linalg.norm(δ1[1]-yx_ad))
    print(np.linalg.norm(δ1[0]-yt_ad))
    print(np.linalg.norm(δ2[1]-yx_ad))
    print(np.linalg.norm(δ2[0]-yt_ad))

(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
3.4924597e-10
2.3283064e-10
3.4924597e-10
2.3283064e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
2.910383e-10
1.1641532e-10
2.910383e-10
1.1641532e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.1641532e-10
1.1641532e-10
1.1641532e-10
1.1641532e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
2.4738256e-10
0.0
2.4738256e-10
0.0
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.4551915e-11
5.820766e-11
1.4551915e-11
5.820766e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.1641532e-10
3.4924597e-10
1.1641532e-10
3.4924597e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
3.4924597e-10
1.3096724e-10
3.4924597e-10
1.3096724e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
5.820766e-11
1.1641532e-10
5.820766e-11
1.1641532e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
2.3283064e-10
2.3283064e-10
2.3283064e-10
2.3283064e-10
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.1641532e-10
1.164153

In [234]:
# def _fvals2(N, x):

#     with tf.GradientTape(persistent=True) as h:
#         h.watch(x)
#         with tf.GradientTape() as g:
#             g.watch(x)
#             y = N(x)

#         dy_dx = g.gradient(y, x)
    
#     d2y_d2x = h.jacobian(dy_dx, x)

#     return y, dy_dx, d2y_d2x

def _fvals2_ad(N, t, x):
    
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(t)
        tape.watch(x)
        tx = tf.stack([t, x], axis=1)
        y = N(tx)
        yx = tape.gradient(y, x)
    
    yt = tape.gradient(y, t)
    yxx = tape.gradient(yx, x)

    return y, yt, yx, yxx

In [269]:
for i in range(10):
    x = tf.random.normal((1,2))
    NeuralN = PINN()
    out,δ,ϑ = NeuralN.get_gradient_and_hessian(x)
    out_ad, yx_ad, yt_ad, yxx_ad = _fvals2_ad(NeuralN, x[:,0], x[:,1])
    print(np.linalg.norm(ϑ[0, 1, 1]-yxx_ad))

(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
5.820766e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
5.820766e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.6370905e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
4.3655746e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
7.275958e-12
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
4.3655746e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
1.4551915e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
7.275958e-12
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
2.910383e-11
(20, 1)
(20, 20)
(20, 1)
(20, 20)
(20, 1)
(20, 2)
2.5465852e-11


Maybe gradient tape thinks that the neural network is not differentiable?

# Explicit derivatives of ResNet

Here we only approximate the "half" gradient so far. 

In [277]:
class PINN_ResNet(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 ResNetLayers=2,
                 ResNetNeurons=16,
                 ResNetStepsize=1.0,
                 ResNetActivation='sigmoid',
                 **kwargs):
        
        super(PINN_ResNet, self).__init__(**kwargs)
        
        #RNact = tf.keras.activations.get(ResNetActivation)
        #RNact = my_act
        RNact = ResNetActivation
        

        
        self.ResNetLayers = ResNetLayers
        self.ResNetStepsize = ResNetStepsize

        self.ResNet = [tf.keras.layers.Dense(ResNetNeurons,
                                        activation = RNact) for _ in range(self.ResNetLayers)]
        self.wb = tf.keras.layers.Dense(1)
        self.A = tf.keras.layers.Dense(2, use_bias=False)
        self.c = tf.keras.layers.Dense(1, use_bias=False)
        
        #self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = 1


        # Define NN architecture
        
        # Output layer
        #self.out = tf.keras.layers.Dense(1, activation='sigmoid')

        
    def call(self, input_tensor, training=False):
        """Forward-pass through neural network."""
        
        self.tmp_layer_output = [input_tensor]
        
        N = self.ResNet[0](input_tensor, training=training)
        
        for i in range(1, self.ResNetLayers):
            self.tmp_layer_output.append(N)
            N = N + self.ResNetStepsize * self.ResNet[i](N, training=training)
        
        Phi = self.wb(N, training=training)

        As = self.A(input_tensor, training=training)
        sAs = tf.keras.layers.Dot(axes=(1))([input_tensor, As])
        Phi += .5 * sAs
        Phi += self.c(input_tensor, training=training)
            
        return Phi
    
    def get_gradient(self, x):
        output = self.call(x)
        δ = get_gradient_layer(self.ResNet[-1].weights[0], self.ResNet[-1].weights[1], self.tmp_layer_output[-1], self.wb.weights[0])
        
        print(δ.shape)
        δ = self.wb.weights[0] + self.ResNetStepsize * δ

        for k in range(self.ResNetLayers-2, 0, -1):
            δ = δ + self.ResNetStepsize * get_gradient_layer(self.ResNet[k].weights[0], self.ResNet[k].weights[1], self.tmp_layer_output[k], δ)


        δ = get_gradient_layer(self.ResNet[0].weights[0], self.ResNet[0].weights[1], self.tmp_layer_output[0], δ)

        M = self.A.weights[0]

        return output, δ + 0.5*tf.transpose(x @ (M + tf.transpose(M))) + self.c.weights[0]
        
        
    def get_gradient_and_hessian(self, x):
        x = tf.reshape(x, (1,2))
        output = self.call(x)
        δ,ϑ,z = get_gradient_hessian_layer_ResNet(self.ResNet[-1].weights[0], self.ResNet[-1].weights[1], self.tmp_layer_output[-1], self.wb.weights[0])

        δ = self.wb.weights[0] + self.ResNetStepsize * δ

        for k in range(self.ResNetLayers-2, 0, -1):
            δ_new, ϑ_new_1, z = get_gradient_hessian_layer_ResNet(self.ResNet[k].weights[0], self.ResNet[k].weights[1], self.tmp_layer_output[k], δ)
            t = ϑ + self.ResNetStepsize * self.ResNet[k].weights[0] @ ( mdσ(z) * ϑ)
            ϑ_new_2 = tf.transpose(t) + self.ResNetStepsize * self.ResNet[k].weights[0] @ ( mdσ(z) * tf.transpose(t))
            ϑ = ϑ_new_1 + ϑ_new_2
            δ = δ + self.ResNetStepsize * δ_new


        δ, ϑ = get_gradient_hessian_hidden_layer(self.ResNet[0].weights[0], self.ResNet[0].weights[1], self.tmp_layer_output[0], δ, ϑ)

        M = self.A.weights[0]

        return output, δ + 0.5*tf.transpose(x @ (M + tf.transpose(M))) + self.c.weights[0], ϑ + 0.5*(M + tf.transpose(M))

Gradient of model, which approximates solution of pde

In [278]:
# def get_gradient_ResNet(R,x):
#     output = R(x)
#     δ = get_gradient_layer(R.ResNet[-1].weights[0], R.ResNet[-1].weights[1], R.tmp_layer_output[-1], R.wb.weights[0])

#     δ = R.wb.weights[0] + R.ResNetStepsize * δ
 
#     for k in range(R.ResNetLayers-2, 0, -1):
#         δ = δ + R.ResNetStepsize * get_gradient_layer(R.ResNet[k].weights[0], R.ResNet[k].weights[1], R.tmp_layer_output[k], δ)
          
    
#     δ = get_gradient_layer(R.ResNet[0].weights[0], R.ResNet[0].weights[1], R.tmp_layer_output[0], δ)
    
#     M = R.A.weights[0]
    
#     return output, δ + 0.5*tf.transpose(x @ (M + tf.transpose(M))) + R.c.weights[0]

Something is wrong with the 'whole' gradient?

In [279]:
Resnet = PINN_ResNet()

x = tf.constant([[1., 10.]])

out, δ = Resnet.get_gradient(x)

print(δ)

out_ad, yx_ad, yt_ad = _fvals1(Resnet, x[:,0], x[:,1])

print(yx_ad)
print(yt_ad)

print(np.linalg.norm(δ[0] - yx_ad))

(16, 1)
tf.Tensor(
[[-0.44613445]
 [ 8.789103  ]], shape=(2, 1), dtype=float32)
tf.Tensor([-0.4461347], shape=(1,), dtype=float32)
tf.Tensor([8.789103], shape=(1,), dtype=float32)
2.3841858e-07


In [280]:
for i in range(10):
    x = tf.random.normal((1,2))
    Resnet = PINN_ResNet()
    out, δ = Resnet.get_gradient(x)
    out_ad, yx_ad, yt_ad = _fvals1(Resnet, x[:,0], x[:,1])
    print(np.linalg.norm(δ[0] - yx_ad))
    print(np.linalg.norm(δ[1] - yt_ad))

(16, 1)
0.0
1.1920929e-07
(16, 1)
0.0
0.0
(16, 1)
0.0
5.9604645e-08
(16, 1)
0.0
2.3841858e-07
(16, 1)
2.9802322e-08
0.0
(16, 1)
7.450581e-09
0.0
(16, 1)
2.2351742e-08
5.9604645e-08
(16, 1)
0.0
1.4901161e-08
(16, 1)
0.0
2.2351742e-08
(16, 1)
0.0
1.1920929e-07


In [281]:
x = tf.random.normal((19,2))
Resnet = PINN_ResNet()
out, δ = Resnet.get_gradient(x)
out_ad, yx_ad, yt_ad = _fvals1(Resnet, x[:,0], x[:,1])
print(np.linalg.norm(δ[0,:]-yx_ad))
print(np.linalg.norm(δ[1,:]-yt_ad))

(16, 19)
3.8656955e-07
6.891787e-08


In [97]:
def get_gradient_hessian_layer_ResNet(W,b,a,δ):
#     z1 = np.transpose(a @ W)  
#     b = np.reshape(b, np.shape(z1))
#     z2 = z1 + b
#     z3 = np.diag(tf.reshape(mdσ(z2), [-1])) @ δ
    
#     z4 = md2σ(z2) * δ
#     ϑ = np.diag(tf.reshape(z4, [-1]))
    
#     return W @ z3, W @ ϑ @ np.transpose(W)
    z = tf.transpose(a @ W + b)
    return W @ (mdσ(z) * δ), W @ ((md2σ(z) * δ) * tf.transpose(W)), z

In [98]:
# def get_hessian_ResNet(R,x):
#     x = tf.reshape(x, (1,2))
#     output = R(x)
#     δ,ϑ,z = get_gradient_hessian_layer_ResNet(R.ResNet[-1].weights[0], R.ResNet[-1].weights[1], R.tmp_layer_output[-1], R.wb.weights[0])

#     δ = R.wb.weights[0] + R.ResNetStepsize * δ
 
#     for k in range(R.ResNetLayers-2, 0, -1):
#         δ_new, ϑ_new_1, z = get_gradient_hessian_layer_ResNet(R.ResNet[k].weights[0], R.ResNet[k].weights[1], R.tmp_layer_output[k], δ)
#         t = ϑ + R.ResNetStepsize * R.ResNet[k].weights[0] @ ( mdσ(z) * ϑ)
#         ϑ_new_2 = tf.transpose(t) + R.ResNetStepsize * R.ResNet[k].weights[0] @ ( mdσ(z) * tf.transpose(t))
#         ϑ = ϑ_new_1 + ϑ_new_2
#         δ = δ + R.ResNetStepsize * δ_new
    
      
#     δ, ϑ = get_gradient_hessian_hidden_layer(R.ResNet[0].weights[0], R.ResNet[0].weights[1], R.tmp_layer_output[0], δ, ϑ)
    
#     M = R.A.weights[0]
    
#     return output, δ + 0.5*tf.transpose(x @ (M + tf.transpose(M))) + R.c.weights[0], ϑ + 0.5*(M + tf.transpose(M))

In [99]:
tf.random.set_seed(0)
for i in range(10):
    x = tf.random.normal((1,2))
    Resnet = PINN_ResNet()
    out, δ, ϑ = Resnet.get_gradient_and_hessian(x)
    out_ad, yx_ad, yt_ad, yxx_ad = _fvals2_ad(Resnet, x[:,0], x[:,1])
    print(np.linalg.norm(ϑ[1,1]-yxx_ad))

0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.4901161e-08
0.0
0.0


In [100]:
x = tf.random.normal((10,2))
Resnet = PINN_ResNet()
result = tf.vectorized_map(Resnet.get_gradient_and_hessian, x)
out, yx_ad, yt_ad, yxx_ad = _fvals2_ad(Resnet, x[:,0], x[:,1])
# print(result[2][:,0,0])
# print(yxx_ad)
print(np.linalg.norm(result[2][:,1,1] - yxx_ad))
# print(tf.reshape(result[1][:,0], [-1]))
# print(tf.concat(yt_ad, 0))
print(np.linalg.norm(tf.reshape(result[1][:,1], [-1]) - tf.concat(yt_ad, 0)))
print(np.linalg.norm(tf.reshape(result[1][:,0], [-1]) - tf.concat(yx_ad, 0)))

5.9604645e-08
1.6323405e-07
1.4901161e-07
