# First and second derivative of FNN with respect to input

Import necessary packages.

In [52]:
import tensorflow as tf
import numpy as np

Define activation function and its derivatives.

In [53]:
# Custom activation function
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects

#def mσ(x):
    #return np.abs(x) + np.log(1. + np.exp(-2. * np.abs(x)))
    
def mσ(x):
    return K.sigmoid(x)

get_custom_objects().update({'custom_activation': Activation(mσ)})


In [54]:
#def mdσ(x):
    #return np.tanh(x)
    
    
#def md2σ(x):
    #return np.divide(1., np.square(np.cosh(x)))

def mdσ(x):
    return mσ(x) * (1 - mσ(x))
    
    
def md2σ(x):
    return mσ(x) * (1 - mσ(x)) * (1 - 2*mσ(x))

In [55]:
x = [[10], [20], [30]]

print(mσ(x))
print(mdσ(x))
print(md2σ(x))

[[10.]
 [20.]
 [30.]]
[[1.]
 [1.]
 [1.]]
[[8.24461446e-09]
 [1.69934170e-17]
 [3.50260431e-26]]


Does not exactly match the results/values in Julia.

Define Neural Network.

In [106]:
# Define model architecture
class PINN(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 output_dim=1,
                 num_hidden_layers=3,
                 num_neurons_per_layer=20,
                 activationfunction = 'sigmoid',
                 kernel_initializer='glorot_normal',
                 **kwargs):
        
        super().__init__(**kwargs)

        self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = output_dim

        # Define NN architecture
        
        # Inititialize num_hidden_layers many fully connected dense layers
        self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                                             activation = activationfunction,
                                             kernel_initializer=kernel_initializer) for _ in range(self.num_hidden_layers)]
        
        # Output layer
        #self.out = tf.keras.layers.Dense(output_dim, activation=None)
        self.out = tf.keras.layers.Dense(output_dim, activation = activationfunction)
        
    def call(self, X):
        """Forward-pass through neural network."""
        self.tmp_layer_output = []
        #Z = self.scale(X)
        Z = X
        self.tmp_layer_output.append(Z)
        
        for i in range(self.num_hidden_layers):
            Z = self.hidden[i](Z)
            self.tmp_layer_output.append(Z)
            
        return self.out(Z)

Compute gradient.

Compute gradient for layer l.

In [107]:
def getGradientLayer(W,b,a,δ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    return W @ z3

Compute gradient of neural network.

In [108]:
def getGradient(N):
    δ = getGradientLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ = getGradientLayer(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ)
            
    return δ

Compute gradient and Hessian of last layer.

In [109]:
def getGradientHessianLayer(W,b,a,δ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    ϑ = np.diag(md2σ(z2).flatten('F'))
    
    return W @ z3, W @ ϑ @ np.transpose(W)

Compute gradient and Hessian of hidden layer.

In [110]:
def getGradientHessianLayer_hidden(W,b,a,δ,ϑ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    t2 = δ * md2σ(z2)
    H1 = W @ np.diag(t2.flatten('F')) @ np.transpose(W)

    dσt = np.diag(mdσ(z2).flatten('F'))
    t3 = dσt @ ϑ @ dσt
    H2 = W @ t3 @ np.transpose(W)
    
    return W @ z3, H1+H2

Compute Hessian and gradient of neural network.

In [111]:
def getHessian(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ

Why do we get a 2D vector when we insert a 2D vector?

In [112]:
NeuralN = PINN()
x = tf.constant([[0.15, 0.1]])
#x = tf.constant([[1.]])
out = NeuralN(x)
print(out)
#print(N.tmp_layer_output[2])
#print(N.hidden[0].get_weights()[0] * x + N.hidden[0].get_weights()[1])
#print(mσ(N.hidden[0].get_weights()[0]))
#print(np.shape(N.out.get_weights()[0]))
#print(np.shape(N.hidden[-1].get_weights()[0]))
#print(N.hidden[1].get_weights()[0])
δ1 = getGradient(NeuralN)
δ2,ϑ = getHessian(NeuralN)
#print(len(N.hidden))
#print(N.num_hidden_layers)
print(δ1- δ2)
print(δ1)
print(ϑ)

tf.Tensor([[0.74503386]], shape=(1, 1), dtype=float32)
[[0.]
 [0.]]
[[0.00705603]
 [0.00928249]]
[[0.02808889 0.03007298]
 [0.03007298 0.04987809]]


-> We need to choose appropriate dtypes so that no operation overflows.

In [113]:
def _fvals1(N, x):

    with tf.GradientTape() as g:
        g.watch(x)
        y = N(x)

    dy_dx = g.gradient(y, x)

    return dy_dx

In [114]:
#X = tf.random.normal((10,2))
#out = NeuralN(X)
#print(out)
#print(X)
#print(getGradient(N))
#print(_fvals1(NeuralN, X))
print(_fvals1(NeuralN, x))

tf.Tensor([[-0.00051097 -0.00069446]], shape=(1, 2), dtype=float32)


In [74]:
def _fvals2(N, x):

    with tf.GradientTape(persistent=True) as h:
        h.watch(x)
        with tf.GradientTape(persistent=True) as g:
            g.watch(x)
            y = N(x)

        dy_dx = g.jacobian(y, x)
    
    d2y_d2x = h.gradient(dy_dx, x)

    return y, dy_dx, d2y_d2x

In [75]:
print(_fvals2(NeuralN, x))

TypeError: Target should be a list or nested structure of Tensors or Variables to be differentiated, but recieved None

Maybe gradient tape thinks that the neural network is not differentiable?

# Explicit derivatives of ResNet

In [167]:
class PINN_ResNet(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 ResNetLayers=2,
                 ResNetNeurons=16,
                 ResNetStepsize=1.0,
                 ResNetActivation='softplus',
                 **kwargs):
        
        super(PINN_ResNet, self).__init__(**kwargs)
        
        #RNact = tf.keras.activations.get(ResNetActivation)
        #RNact = my_act
        RNact = mσ
        

        
        self.ResNetLayers = ResNetLayers
        self.ResNetStepsize = ResNetStepsize

        self.ResNet = [tf.keras.layers.Dense(ResNetNeurons,
                                        activation = RNact) for _ in range(self.ResNetLayers)]
        self.wb = tf.keras.layers.Dense(1)
        self.A = tf.keras.layers.Dense(2, use_bias=False)
        self.c = tf.keras.layers.Dense(1, use_bias=False)
        
        #self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = 1


        # Define NN architecture
        
        # Output layer
        #self.out = tf.keras.layers.Dense(1, activation='sigmoid')

        
    def call(self, input_tensor, training=False):
        """Forward-pass through neural network."""
        self.tmp_layer_output = [input_tensor]
        N = self.ResNet[0](input_tensor, training=training)
        self.tmp_layer_output.append(N)
        for i in range(1, self.ResNetLayers):
            N = N + self.ResNetStepsize * self.ResNet[i](N, training=training)
            self.tmp_layer_output.append(N)
        Phi = self.wb(N, training=training)
        #print(input_tensor)
        As = self.A(input_tensor, training=training)
        #print(As)
        sAs = tf.keras.layers.Dot(axes=(1))([input_tensor, As])
        #print(sAs)
        Phi += .5 * sAs
        #print(Phi.shape)
        Phi += self.c(input_tensor, training=training)
            
        return Phi
        #return self.out(Phi)

In [168]:
Resnet = PINN_ResNet()

out = Resnet(x)
#print(out)
#print(Resnet.tmp_layer_output.append)
print(Resnet.wb.get_weights()[0].shape)
print(Resnet.ResNet[-1].get_weights()[0].shape)

(16, 1)
(16, 16)


In [236]:
Resnet.wb.get_weights()

[array([[-0.00221908],
        [ 0.3507337 ],
        [-0.32138073],
        [ 0.57655346],
        [ 0.5876173 ],
        [-0.54192346],
        [-0.43655986],
        [ 0.09991902],
        [-0.20411497],
        [ 0.27344877],
        [-0.3795122 ],
        [ 0.50932956],
        [-0.25069582],
        [ 0.03257531],
        [ 0.29152948],
        [ 0.4373746 ]], dtype=float32),
 array([0.], dtype=float32)]

In [237]:
m = getGradientLayer(Resnet.ResNet[-2].get_weights()[0], Resnet.ResNet[-2].get_weights()[1], Resnet.tmp_layer_output[-2], Resnet.wb.get_weights()[0])
np.shape(m)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 16)

In [170]:
Resnet.wb.get_weights()[0]

array([[-0.00221908],
       [ 0.3507337 ],
       [-0.32138073],
       [ 0.57655346],
       [ 0.5876173 ],
       [-0.54192346],
       [-0.43655986],
       [ 0.09991902],
       [-0.20411497],
       [ 0.27344877],
       [-0.3795122 ],
       [ 0.50932956],
       [-0.25069582],
       [ 0.03257531],
       [ 0.29152948],
       [ 0.4373746 ]], dtype=float32)

Attention Attention! We only calculate the gradient of the neural network multiplied by w here!

In [262]:
def getGradient_ResNet(R):
    δ = getGradientLayer(R.ResNet[-1].get_weights()[0], R.ResNet[-1].get_weights()[1], R.tmp_layer_output[-1], R.wb.get_weights()[0])

    δ = R.wb.get_weights()[0] + R.ResNetStepsize * δ
 
    for k in range(R.ResNetLayers-1, 0, -1):
        print(δ.shape)
        δ = δ + R.ResNetStepsize * getGradientLayer(R.ResNet[k].get_weights()[0], R.ResNet[k].get_weights()[1], R.tmp_layer_output[k], δ)
          
    print(δ.shape)
    z1 = np.transpose(R.tmp_layer_output[0] @ R.ResNet[0].get_weights()[0])  
    b = np.reshape(R.ResNet[0].get_weights()[1], np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    print(z3.shape)
    print(R.ResNet[0].get_weights()[1].shape)
    
    
    return R.ResNet[0].get_weights()[0] @ z3 

In [263]:
getGradient_ResNet(Resnet)

(16, 1)
(16, 1)
(16, 1)
(16,)


array([[ 0.02126515],
       [-0.00668164]], dtype=float32)

In [264]:
Resnet.ResNet[0].get_weights()[0]

array([[ 0.4022013 , -0.5119319 , -0.20393524,  0.39795297,  0.2650633 ,
         0.25077307, -0.1648305 , -0.23486501, -0.09690687,  0.01209843,
         0.204719  , -0.31106764,  0.45248616, -0.23396394,  0.31787902,
        -0.01179892],
       [-0.30930048,  0.0735175 ,  0.19421488,  0.30356568,  0.23385906,
         0.37316298,  0.17676139, -0.46002835, -0.09949651, -0.39412093,
         0.23541123, -0.1332941 ,  0.49164307,  0.47637796, -0.25680012,
        -0.04756379]], dtype=float32)

In [265]:
def getHessian_ResNet(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ