# First and second derivative of FNN with respect to input

Import necessary packages.

In [3]:
import tensorflow as tf
import numpy as np

Define activation function and its derivatives.

In [4]:
# Custom activation function
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects

#def mσ(x):
    #return np.abs(x) + np.log(1. + np.exp(-2. * np.abs(x)))
    
def mσ(x):
    return np.divide(1, 1 + np.exp(np.negative(x)))

get_custom_objects().update({'custom_activation': Activation(mσ)})


In [5]:
x = range(5)
mσ(x)

array([0.5       , 0.73105858, 0.88079708, 0.95257413, 0.98201379])

In [6]:
#def mdσ(x):
    #return np.tanh(x)
    
    
#def md2σ(x):
    #return np.divide(1., np.square(np.cosh(x)))

def mdσ(x):
    return mσ(x) * (1 - mσ(x))
    
    
def md2σ(x):
    return mσ(x) * (1 - mσ(x)) * (1 - 2*mσ(x))

In [7]:
x = [[10], [20], [30]]

print(mσ(x))
print(mdσ(x))
print(md2σ(x))

[[0.9999546]
 [1.       ]
 [1.       ]]
[[4.53958077e-05]
 [2.06115369e-09]
 [9.34807787e-14]]
[[-4.53916860e-05]
 [-2.06115368e-09]
 [-9.34807787e-14]]


Does not exactly match the results/values in Julia.

Define Neural Network.

In [8]:
# Define model architecture
class PINN(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 output_dim=1,
                 num_hidden_layers=3,
                 num_neurons_per_layer=20,
                 activationfunction = 'sigmoid',
                 kernel_initializer='glorot_normal',
                 **kwargs):
        
        super().__init__(**kwargs)

        self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = output_dim

        # Define NN architecture
        
        # Inititialize num_hidden_layers many fully connected dense layers
        self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                                             activation = activationfunction,
                                             kernel_initializer=kernel_initializer) for _ in range(self.num_hidden_layers)]
        
        # Output layer
        #self.out = tf.keras.layers.Dense(output_dim, activation=None)
        self.out = tf.keras.layers.Dense(output_dim, activation = activationfunction)
        
    def call(self, X):
        """Forward-pass through neural network."""
        self.tmp_layer_output = []
        #Z = self.scale(X)
        Z = X
        self.tmp_layer_output.append(Z)
        
        for i in range(self.num_hidden_layers):
            Z = self.hidden[i](Z)
            self.tmp_layer_output.append(Z)
            
        return self.out(Z)

Compute gradient.

Compute gradient for layer l.

In [9]:
def getGradientLayer(W,b,a,δ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    return W @ z3

Compute gradient of neural network.

In [10]:
def getGradient(N):
    δ = getGradientLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ = getGradientLayer(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ)
            
    return δ

Compute gradient and Hessian of last layer.

In [11]:
def getGradientHessianLayer(W,b,a,δ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    ϑ = np.diag(md2σ(z2).flatten('F'))
    
    return W @ z3, W @ ϑ @ np.transpose(W)

Compute gradient and Hessian of hidden layer.

In [12]:
def getGradientHessianLayer_hidden(W,b,a,δ,ϑ):
    z1 = np.transpose(a @ W)  
    b = np.reshape(b, np.shape(z1))
    z2 = z1 + b
    z3 = np.diag(mdσ(z2).flatten('F')) @ δ
    
    t2 = δ * md2σ(z2)
    H1 = W @ np.diag(t2.flatten('F')) @ np.transpose(W)

    dσt = np.diag(mdσ(z2).flatten('F'))
    t3 = dσt @ ϑ @ dσt
    H2 = W @ t3 @ np.transpose(W)
    
    return W @ z3, H1+H2

Compute Hessian and gradient of neural network.

In [13]:
def getHessian(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ

Why do we get a 2D vector when we insert a 2D vector?

In [14]:
NeuralN = PINN()
x = tf.constant([[0.15, 0.1]])
#x = tf.constant([[1.]])
out = NeuralN(x)
print(out)
#print(N.tmp_layer_output[2])
#print(N.hidden[0].get_weights()[0] * x + N.hidden[0].get_weights()[1])
#print(mσ(N.hidden[0].get_weights()[0]))
#print(np.shape(N.out.get_weights()[0]))
#print(np.shape(N.hidden[-1].get_weights()[0]))
#print(N.hidden[1].get_weights()[0])
δ1 = getGradient(NeuralN)
δ2,ϑ = getHessian(NeuralN)
#print(len(N.hidden))
#print(N.num_hidden_layers)
print(δ1- δ2)
print(δ1)
print(ϑ)

tf.Tensor([[0.65655744]], shape=(1, 1), dtype=float32)
[[0.]
 [0.]]
[[ 0.00203109]
 [-0.00050551]]
[[-4.95978355e-06 -1.97882320e-05]
 [-1.97882323e-05  1.31986261e-05]]


-> We need to choose appropriate dtypes so that no operation overflows.

In [15]:
def _fvals1(N, x):

    with tf.GradientTape() as g:
        g.watch(x)
        y = N(x)

    dy_dx = g.gradient(y, x)

    return dy_dx

In [16]:
#X = tf.random.normal((10,2))
#out = NeuralN(X)
#print(out)
#print(X)
#print(getGradient(N))
#print(_fvals1(NeuralN, X))
print(_fvals1(NeuralN, x))

tf.Tensor([[ 0.00203109 -0.00050551]], shape=(1, 2), dtype=float32)


In [17]:
def _fvals2(N, x):

    with tf.GradientTape(persistent=True) as h:
        h.watch(x)
        with tf.GradientTape() as g:
            g.watch(x)
            y = N(x)

        dy_dx = g.gradient(y, x)
    
    d2y_d2x = h.jacobian(dy_dx, x)

    return d2y_d2x

In [18]:
print(_fvals2(NeuralN, x))

tf.Tensor(
[[[[-4.9597638e-06 -1.9788240e-05]]

  [[-1.9788238e-05  1.3198628e-05]]]], shape=(1, 2, 1, 2), dtype=float32)


Maybe gradient tape thinks that the neural network is not differentiable?

# Explicit derivatives of ResNet

In [37]:
class PINN_ResNet(tf.keras.Model):
    """ Set basic architecture of the PINN model."""

    def __init__(self,
                 ResNetLayers=3,
                 ResNetNeurons=16,
                 ResNetStepsize=1.0,
                 ResNetActivation='sigmoid',
                 **kwargs):
        
        super(PINN_ResNet, self).__init__(**kwargs)
        
        #RNact = tf.keras.activations.get(ResNetActivation)
        #RNact = my_act
        RNact = ResNetActivation
        

        
        self.ResNetLayers = ResNetLayers
        self.ResNetStepsize = ResNetStepsize

        self.ResNet = [tf.keras.layers.Dense(ResNetNeurons,
                                        activation = RNact) for _ in range(self.ResNetLayers)]
        self.wb = tf.keras.layers.Dense(1)
        self.A = tf.keras.layers.Dense(2, use_bias=False)
        self.c = tf.keras.layers.Dense(1, use_bias=False)
        
        #self.num_hidden_layers = num_hidden_layers
        self.input_dim = 2
        self.output_dim = 1


        # Define NN architecture
        
        # Output layer
        #self.out = tf.keras.layers.Dense(1, activation='sigmoid')

        
    def call(self, input_tensor, training=False):
        """Forward-pass through neural network."""
        self.tmp_layer_output = [input_tensor]
        N = self.ResNet[0](input_tensor, training=training)
        for i in range(1, self.ResNetLayers):
            self.tmp_layer_output.append(N)
            N = N + self.ResNetStepsize * self.ResNet[i](N, training=training)
        Phi = self.wb(N, training=training)
        #print(input_tensor)
        #As = self.A(input_tensor, training=training)
        #print(As)
        #sAs = tf.keras.layers.Dot(axes=(1))([input_tensor, As])
        #print(sAs)
        #Phi += .5 * sAs
        #print(Phi.shape)
        #Phi += self.c(input_tensor, training=training)
            
        return Phi
        #return self.out(Phi)

In [38]:
Resnet = PINN_ResNet()

out = Resnet(x)
_fvals1(Resnet, x)
#print(out)
#print(Resnet.tmp_layer_output.append)
#print(Resnet.wb.get_weights()[0].shape)
#print(Resnet.ResNet[-1].get_weights()[0].shape)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.08468716, -0.01424088]], dtype=float32)>

In [21]:
x.shape

TensorShape([1, 2])

Gradient of model, which approximates solution of pde

In [22]:
for i in range(5, 0, -1):
    print(i)

5
4
3
2
1


In [45]:
def getGradient_ResNet(R):
    δ = getGradientLayer(R.ResNet[-1].get_weights()[0], R.ResNet[-1].get_weights()[1], R.tmp_layer_output[-1], R.wb.get_weights()[0])

    δ = R.wb.get_weights()[0] + R.ResNetStepsize * δ
 
    for k in range(R.ResNetLayers-2, 0, -1):
        δ = δ + R.ResNetStepsize * getGradientLayer(R.ResNet[k].get_weights()[0], R.ResNet[k].get_weights()[1], R.tmp_layer_output[k], δ)
          
    
    δ = getGradientLayer(R.ResNet[0].get_weights()[0], R.ResNet[0].get_weights()[1], R.tmp_layer_output[0], δ)
    
    #return δ + np.transpose(R.A(R.tmp_layer_output[0])) + R.c.get_weights()[0]
    return δ 

In [46]:
Resnet.ResNetLayers

3

In [47]:
len(Resnet.tmp_layer_output)

3

In [48]:
getGradient_ResNet(Resnet)

array([[-0.08468717],
       [-0.01424088]], dtype=float32)

In [42]:
Resnet.ResNet[0].get_weights()[0].shape

(2, 16)

In [43]:
_fvals1(Resnet, x)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.08468716, -0.01424088]], dtype=float32)>

In [193]:
Resnet.A(x)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 0.04803059, -0.06120798]], dtype=float32)>

In [217]:
0.7906373 / 0.705509

1.120662245272562

In [218]:
0.75242096 / 0.620812

1.2119948712331592

In [265]:
def getHessian_ResNet(N):
    δ,ϑ = getGradientHessianLayer(N.out.get_weights()[0], N.out.get_weights()[1], N.tmp_layer_output[-1], np.identity(N.output_dim))

    for k in range(N.num_hidden_layers-1, -1, -1):
        δ,ϑ = getGradientHessianLayer_hidden(N.hidden[k].get_weights()[0], N.hidden[k].get_weights()[1], N.tmp_layer_output[k], δ,  ϑ)
            
    return δ,ϑ