# First and second derivative of FNN with respect to input

Import necessary packages.

In [104]:
import tensorflow as tf
import numpy as np

Define activation function and its derivatives.

In [105]:
def mσ(x):
    return np.abs(x) + np.log(1. + np.exp(-2. * np.abs(x)))
        
        
def mdσ(x):
    return np.tanh(x)
    
    
def md2σ(x):
    return np.divide(1., np.square(np.cosh(x)))

In [106]:
x = [[10], [20], [30]]

print(mσ(x))
print(mdσ(x))
print(md2σ(x))

[[10.]
 [20.]
 [30.]]
[[1.]
 [1.]
 [1.]]
[[8.24461446e-09]
 [1.69934170e-17]
 [3.50260431e-26]]


Does not exactly match the results/values in Julia.

Define Neural Network.

In [107]:
# Define model architecture
class PINN(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 output_dim=1,
                 num_hidden_layers=4,
                 num_neurons_per_layer=20,
                 activation= mσ,
                 kernel_initializer='glorot_normal',
                 **kwargs):
        
        super().__init__(**kwargs)

        self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = output_dim

        # Define NN architecture
        
        # Inititialize num_hidden_layers many fully connected dense layers
        self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                                             activation = activation,
                                             kernel_initializer=kernel_initializer) for _ in range(self.num_hidden_layers)]
        
        # Output layer
        #self.out = tf.keras.layers.Dense(output_dim, activation=None)
        self.out = tf.keras.layers.Dense(output_dim, activation = activation)
        
    def call(self, X):
        """Forward-pass through neural network."""
        self.tmp_layer_output = []
        #Z = self.scale(X)
        Z = X
        self.tmp_layer_output.append(Z)
        
        for i in range(self.num_hidden_layers):
            Z = self.hidden[i](Z)
            self.tmp_layer_output.append(Z)
            
        return self.out(Z)

Compute gradient.

Compute gradient for layer l.

In [173]:
def getGradientLayer(W,b,a,δ):
    z = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z))
    z = z + b
    
    #return W @ np.diag(mdσ(z) * δ)
    # return np.diag(mdσ(z) * δ)
    return mdσ(z)

Compute gradient of neural network.

In [109]:
def getGradient(N):
    δ = getGradientLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ = getGradientLayer(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ)
            
    return δ

Compute gradient and Hessian of last layer.

In [110]:
def getGradientHessianLayer(W,b,a,δ):
    z = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z))
    z = z + b
    ϑ = np.diag(md2σ(z).flatten('F'))
    
    return W @ np.diag(mdσ(z) * δ), W @ ϑ @ np.transpose(W)

Compute gradient and Hessian of hidden layer.

In [111]:
def getGradientHessianLayer_hidden(W,b,a,δ,ϑ):
    z = np.transpose(a @ W) 
    b = np.reshape(b, np.shape(z))
    z = z + b
    
    δh = np.reshape(δ, np.shape(md2σ(z)))
    t2 = δh * md2σ(z)
    H1 = W @ np.diag(t2.flatten('F')) @ np.transpose(W)

    dσt = np.diag(mdσ(z).flatten('F'))
    t3 = dσt @ ϑ @ dσt
    H2 = W @ t3 @ np.transpose(W)
    
    return W @ np.diag(mdσ(z) * δ), H1+H2

Compute Hessian and gradient of neural network.

In [112]:
def getHessian(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ

Why do we get a 2D vector when we insert a 2D vector?

In [113]:
N = PINN()
x = tf.Variable([[0.15, 0.1]])
#x = tf.constant([[1.]])
out = N(x)
print(out)
#print(N.tmp_layer_output[2])
#print(N.hidden[0].get_weights()[0] * x + N.hidden[0].get_weights()[1])
#print(mσ(N.hidden[0].get_weights()[0]))
#print(np.shape(N.out.get_weights()[0]))
#print(np.shape(N.hidden[-1].get_weights()[0]))
#print(N.hidden[1].get_weights()[0])
δ1 = getGradient(N)
δ2,ϑ = getHessian(N)
#print(len(N.hidden))
#print(N.num_hidden_layers)
print(δ1 - δ2)
print(δ1)
print(ϑ)

[[1.0392839]]
[0. 0.]
[ 0.00301194 -0.003458  ]
[[ 0.02138047 -0.00110186]
 [-0.00110186 -0.03299938]]


In [114]:
N.out.get_weights()

[array([[-0.3314287 ],
        [ 0.45498294],
        [ 0.2148701 ],
        [-0.42193145],
        [-0.5176827 ],
        [-0.31287473],
        [ 0.3848989 ],
        [ 0.4254127 ],
        [ 0.324243  ],
        [-0.2770318 ],
        [-0.422874  ],
        [ 0.18879676],
        [-0.00179446],
        [-0.38508803],
        [ 0.25300795],
        [-0.44779593],
        [ 0.33125246],
        [-0.02288008],
        [-0.53343976],
        [-0.20876151]], dtype=float32),
 array([0.], dtype=float32)]

-> We need to choose appropriate dtypes so that no operation overflows.

In [115]:
def _fvals1(N, x):

    with tf.GradientTape() as g:
        g.watch(x)
        y = N(x)

    dy_dx = g.jacobian(y, x)

    return y, dy_dx

In [117]:
#X = tf.random.normal((10,2))
#out = N(X[0,:])
#print(out)
#print(X)
#print(getGradient(N))
#print(_fvals1(N, X))
print(_fvals1(N, x)[1])

None


In [118]:
def _fvals2(N, x):

    with tf.GradientTape(persistent=True) as h:
        h.watch(x)
        with tf.GradientTape() as g:
            g.watch(x)
            y = N(x)

        dy_dx = g.jacobian(y, x)
    
    d2y_d2x = h.gradient(dy_dx, x)

    return y, dy_dx, d2y_d2x

In [119]:
print(_fvals2(N, x))

TypeError: Target should be a list or nested structure of Tensors or Variables to be differentiated, but recieved None

Maybe gradient tape thinks that the neural network is not differentiable?

# Explicit derivatives of ResNet

In [167]:
class PINN_ResNet(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 ResNetLayers=2,
                 ResNetNeurons=16,
                 ResNetStepsize=1.0,
                 ResNetActivation='softplus',
                 **kwargs):
        
        super(PINN_ResNet, self).__init__(**kwargs)
        
        #RNact = tf.keras.activations.get(ResNetActivation)
        #RNact = my_act
        RNact = mσ
        

        
        self.ResNetLayers = ResNetLayers
        self.ResNetStepsize = ResNetStepsize

        self.ResNet = [tf.keras.layers.Dense(ResNetNeurons,
                                        activation = RNact) for _ in range(self.ResNetLayers)]
        self.wb = tf.keras.layers.Dense(1)
        self.A = tf.keras.layers.Dense(2, use_bias=False)
        self.c = tf.keras.layers.Dense(1, use_bias=False)
        
        #self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = 1


        # Define NN architecture
        
        # Output layer
        #self.out = tf.keras.layers.Dense(1, activation='sigmoid')

        
    def call(self, input_tensor, training=False):
        """Forward-pass through neural network."""
        self.tmp_layer_output = [input_tensor]
        N = self.ResNet[0](input_tensor, training=training)
        self.tmp_layer_output.append(N)
        for i in range(1, self.ResNetLayers):
            N = N + self.ResNetStepsize * self.ResNet[i](N, training=training)
            self.tmp_layer_output.append(N)
        Phi = self.wb(N, training=training)
        #print(input_tensor)
        As = self.A(input_tensor, training=training)
        #print(As)
        sAs = tf.keras.layers.Dot(axes=(1))([input_tensor, As])
        #print(sAs)
        Phi += .5 * sAs
        #print(Phi.shape)
        Phi += self.c(input_tensor, training=training)
            
        return Phi
        #return self.out(Phi)

In [168]:
Resnet = PINN_ResNet()

out = Resnet(x)
#print(out)
#print(Resnet.tmp_layer_output.append)
print(Resnet.wb.get_weights()[0].shape)
print(Resnet.ResNet[-1].get_weights()[0].shape)

(16, 1)
(16, 16)


In [172]:
getGradientLayer(Resnet.ResNet[-1].get_weights()[0], Resnet.ResNet[-1].get_weights()[1], Resnet.tmp_layer_output[-1], Resnet.wb.get_weights()[0])
   

array([[ 0.91068345],
       [ 2.4387455 ],
       [ 2.3148098 ],
       [ 0.30477303],
       [-0.5590408 ],
       [ 2.088062  ],
       [ 1.1690454 ],
       [-1.5823517 ],
       [-1.3888068 ],
       [ 0.21986805],
       [ 0.36832523],
       [-2.0059075 ],
       [-2.500835  ],
       [ 2.5938003 ],
       [-0.80139565],
       [-1.2713152 ]], dtype=float32)

In [170]:
Resnet.wb.get_weights()[0]

array([[-0.00221908],
       [ 0.3507337 ],
       [-0.32138073],
       [ 0.57655346],
       [ 0.5876173 ],
       [-0.54192346],
       [-0.43655986],
       [ 0.09991902],
       [-0.20411497],
       [ 0.27344877],
       [-0.3795122 ],
       [ 0.50932956],
       [-0.25069582],
       [ 0.03257531],
       [ 0.29152948],
       [ 0.4373746 ]], dtype=float32)

Attention Attention! We only calculate the gradient of the neural network multiplied by w here!

In [129]:
def getGradient_ResNet(ResNet):
    δ = getGradientLayer(ResNet.ResNet[-1].get_weights()[0], ResNet.ResNet[-1].get_weights()[1], ResNet.tmp_layer_output[-1], Resnet.wb.get_weights()[0])
    δ = Resnet.wb.get_weights() + ResNet.ResNetStepsize * δ
 
    for k in range(Resnet.ResNetLayers-1, -1, -1):
        δ = δ + ResNet.ResNetStepsize * getGradientLayer(ResNet.ResNet[k].get_weights()[0], ResNet.ResNet[k].get_weights()[1], ResNet.ResNet.tmp_layer_output[k], δ)
            
    return δ

In [130]:
getGradient_ResNet(Resnet)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 16)

In [None]:
def getHessian_ResNet(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ