# First and second derivative of FNN with respect to input

Import necessary packages.

In [2]:
import tensorflow as tf
import numpy as np

Define activation function and its derivatives.

In [3]:
def mσ(x):
    return np.abs(x) + np.log(1. + np.exp(-2. * np.abs(x)))
        
        
def mdσ(x):
    return np.tanh(x)
    
    
def md2σ(x):
    return np.divide(1., np.square(np.cosh(x)))

In [4]:
x = [[10], [20], [30]]

print(mσ(x))
print(mdσ(x))
print(md2σ(x))

[[10.]
 [20.]
 [30.]]
[[1.]
 [1.]
 [1.]]
[[8.24461446e-09]
 [1.69934170e-17]
 [3.50260431e-26]]


Does not exactly match the results/values in Julia.

In [191]:
# Define model architecture
class PINN(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 output_dim=1,
                 num_hidden_layers=4,
                 num_neurons_per_layer=20,
                 activation= mσ,
                 kernel_initializer='glorot_normal',
                 **kwargs):
        
        super().__init__(**kwargs)

        self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = output_dim

        # Define NN architecture
        
        # Inititialize num_hidden_layers many fully connected dense layers
        self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                                             activation=tf.keras.activations.get(
                                                 activation),
                                             kernel_initializer=kernel_initializer) for _ in range(self.num_hidden_layers)]
        
        # Output layer
        #self.out = tf.keras.layers.Dense(output_dim, activation=None)
        self.out = tf.keras.layers.Dense(output_dim, activation= 'relu')
        
    def call(self, X):
        """Forward-pass through neural network."""
        self.tmp_layer_output = []
        #Z = self.scale(X)
        Z = X
        self.tmp_layer_output.append(Z)
        
        for i in range(self.num_hidden_layers):
            Z = self.hidden[i](Z)
            self.tmp_layer_output.append(Z)
            
        return self.out(Z)

In [192]:
def getGradientLayer(W,b,a,δ):
    z = np.transpose(W).dot(np.transpose(a))  
    # kann schöner gemacht werden. Eigentlich reicht einmal transponieren.
    b = np.reshape(b, np.shape(z))
    z = z + b
    return W.dot(np.diag(mdσ(z) * δ))

In [194]:
def getGradient(N):
    δ = getGradientLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ = getGradientLayer(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ)
            
    return δ

Hessian and gradient computation

In [263]:
def getGradientHessianLayer(W,b,a,δ):
    z = np.transpose(W).dot(np.transpose(a))  
    # kann schöner gemacht werden. Eigentlich reicht einmal transponieren.
    b = np.reshape(b, np.shape(z))
    z = z + b
    ϑ = np.reshape(np.diag(md2σ(z)), (W.shape[1], W.shape[1]))
    return W.dot(np.diag(mdσ(z) * δ)), W @ ϑ @ np.transpose(W)

In [264]:
def getGradientHessianLayer_hidden(W,b,a,δ,ϑ):
    z = np.transpose(W).dot(np.transpose(a))  
    # kann schöner gemacht werden. Eigentlich reicht einmal transponieren.
    b = np.reshape(b, np.shape(z))
    z = z + b
    
    t2  = δ * md2σ(z)
    H1 = W @ np.reshape(np.diag(t2), (W.shape[1], W.shape[1])) @ np.transpose(W)

    dσt = mdσ(z)
    t3 = np.diag(dσt) @ ϑ @ np.diag(dσt)
    H2 = W @ np.reshape(t3, (W.shape[1], W.shape[1])) @ np.transpose(W)
    return W.dot(np.diag(mdσ(z) * δ)), H1 + H2

In [265]:
def getHessian(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ

In [266]:
δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))
print(ϑ.shape)
print(N.out.get_weights()[0].shape[1])


(20, 20)
1


In [196]:
N = PINN()
# x = tf.constant([[150.], [150.]])
x = tf.constant([[150.]])
out = N(x)
print(out)
#print(N.tmp_layer_output[2])
#print(N.hidden[0].get_weights()[0] * x + N.hidden[0].get_weights()[1])
#print(mσ(N.hidden[0].get_weights()[0]))
#print(np.shape(N.out.get_weights()[0]))
#print(np.shape(N.hidden[-1].get_weights()[0]))
#print(N.hidden[1].get_weights()[0])
getGradient(N)
#print(len(N.hidden))
#print(N.num_hidden_layers)

tf.Tensor([[0.]], shape=(1, 1), dtype=float32)


array([0.12862899])

In [197]:
def _fvals1(N, x):

    with tf.GradientTape(watch_accessed_variables=False) as tape:
        tape.watch(x)
        funcvalue = N(x)

    return tape.gradient(funcvalue, x)

In [198]:
print(_fvals1(N, x))

None


# Explicit derivatives of ResNet

In [12]:
class PINN_ResNet(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 ResNetLayers=1,
                 ResNetNeurons=16,
                 ResNetStepsize=1.0,
                 ResNetActivation='softplus',
                 **kwargs):
        
        super(PINN_ResNet, self).__init__(**kwargs)
        
        #RNact = tf.keras.activations.get(ResNetActivation)
        #RNact = my_act
        RNact = mσ
        

        
        self.ResNetLayers = ResNetLayers
        self.ResNetStepsize = ResNetStepsize

        self.ResNet = [tf.keras.layers.Dense(ResNetNeurons,
                                        activation=RNact) for _ in range(self.ResNetLayers)]
        self.wb = tf.keras.layers.Dense(1)
        self.A = tf.keras.layers.Dense(2, use_bias=False)
        self.c = tf.keras.layers.Dense(1, use_bias=False)
        
        #self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = 1


        # Define NN architecture
        
        # Output layer
        #self.out = tf.keras.layers.Dense(1, activation='sigmoid')

        
    def call(self, input_tensor, training=False):
        """Forward-pass through neural network."""
        N = self.ResNet[0](input_tensor, training=training)
        for i in range(1, self.ResNetLayers):
            N = N + self.ResNetStepsize * self.ResNet[i](N, training=training)
        Phi = self.wb(N, training=training)
        #print(input_tensor)
        As = self.A(input_tensor, training=training)
        #print(As)
        sAs = tf.keras.layers.Dot(axes=(1))([input_tensor, As])
        #print(sAs)
        Phi += .5 * sAs
        #print(Phi.shape)
        Phi += self.c(input_tensor, training=training)
            
        return Phi
        #return self.out(Phi)

In [18]:
Resnet = PINN_ResNet()

x = tf.stack([1., 1.], axis=1)
out = Resnet(x)
print(out)

ValueError: axis = 1 not in [-1, 1)