### Load necessary libraries

In [1]:
import numpy as np
import pandas as pd
import neuralsens.partial_derivatives as ns
from sklearn.model_selection import train_test_split
import torch
torch.manual_seed(1)
%matplotlib qt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

### Create synthetic dataset to check behavior of functions

In [3]:
samples = 100000
n_columns = 8
sm = np.random.normal(size=(samples,n_columns))
df = pd.DataFrame(sm, columns=['X' + str(x) for x in range(1,n_columns+1)])

### Check behavior of hessian function

#### Create output Y as linear function of inputs with some non-linear relationship

In [4]:
df['Y'] = - 0.8 * df.X1 + 0.5 * df.X2 ** 2 - df.X3 * df.X4 + 0.1 * np.random.normal(size=(samples,)) 

#### Train MLP model using the data.frame created

In [5]:
## Create random 80/20 % split
X_train, X_test, y_train, y_test = train_test_split(df.loc[:, df.columns != 'Y'].to_numpy(), df['Y'], test_size = 0.2, random_state = 5)

In [6]:
X_train_tch = torch.FloatTensor(X_train).requires_grad_(True).to(device)
X_test_tch = torch.FloatTensor(X_test).requires_grad_(True).to(device)
y_train_tch = torch.FloatTensor(y_train.to_numpy()).to(device)
y_test_tch = torch.FloatTensor(y_test.to_numpy()).to(device)

#### Define MLP model torch class

In [7]:
class MLP(torch.nn.Sequential):
    def __init__(self, input_size:int, output_size:int = 1, hidden_size:list = [10]):
        # Store layers to initiate sequential neural network
        layers           = []
        first = True
        for idx, neurons in enumerate(hidden_size):
            if first:
                layers += [torch.nn.Linear(input_size, neurons)]
                first = False
            else:
                layers += [torch.nn.Linear(hidden_size[idx-1], neurons)]
            layers += [torch.nn.Sigmoid()]
        layers += [torch.nn.Linear(hidden_size[idx-1], output_size)]
        super(MLP, self).__init__(*layers)

In [8]:
model = MLP(input_size=n_columns, output_size=1, hidden_size=[15,15])
model = model.to(device)

#### Model training

In [9]:
# Define error loss and optimizer
criterion = torch.nn.MSELoss()
lr = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)

In [10]:
# Check model performance before training
model.eval()
y_pred = model(X_test_tch)
before_train = criterion(y_pred.squeeze().to(device), y_test_tch)
print('Test loss before training' , before_train.item()) 

Test loss before training 2.2554070949554443


In [11]:
# Train model
model.train()
epoch = 0
loss = before_train
path=[]
while loss.item() > 0.05:
    optimizer.zero_grad() # Reset the gradient
    epoch += 1
    # Forward pass
    y_pred = model(X_train_tch)
    # Compute Loss
    loss = criterion(y_pred.squeeze().to(device), y_train_tch)
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

Epoch 1: train loss: 2.30802845954895
Epoch 2: train loss: 2.177272319793701
Epoch 3: train loss: 2.176687002182007
Epoch 4: train loss: 2.1763529777526855
Epoch 5: train loss: 2.1760220527648926
Epoch 6: train loss: 2.1756932735443115
Epoch 7: train loss: 2.1753666400909424
Epoch 8: train loss: 2.175041913986206
Epoch 9: train loss: 2.1747190952301025
Epoch 10: train loss: 2.1743977069854736
Epoch 11: train loss: 2.1740779876708984
Epoch 12: train loss: 2.173759698867798
Epoch 13: train loss: 2.1734426021575928
Epoch 14: train loss: 2.173126697540283
Epoch 15: train loss: 2.17281174659729
Epoch 16: train loss: 2.172497510910034
Epoch 17: train loss: 2.1721842288970947
Epoch 18: train loss: 2.1718714237213135
Epoch 19: train loss: 2.1715588569641113
Epoch 20: train loss: 2.1712467670440674
Epoch 21: train loss: 2.1709346771240234
Epoch 22: train loss: 2.1706230640411377
Epoch 23: train loss: 2.1703109741210938
Epoch 24: train loss: 2.16999888420105
Epoch 25: train loss: 2.1696860790252

In [12]:
# Check model performance after training
model.eval()
y_pred = model(X_test_tch)
before_train = criterion(y_pred.squeeze().to(device), y_test_tch)
print('Test loss after training' , before_train.item())  

Test loss after training 0.05063025280833244


#### Execute hessian function and check sensitivity metrics

In [13]:
# Obtain parameters to perform hessian
X = pd.DataFrame(X_train, columns=df.columns[df.columns != 'Y'])
y = pd.DataFrame(y_train, columns=['Y'])
sens_end_layer = 'last'
sens_end_input = False
sens_origin_layer = 0
sens_origin_input = True

In [14]:
wts_torch = []
bias_torch = []
for name, param in model.named_parameters():
    if "weight" in name:
        wts_torch.append(param.detach().T.to(device))
    if "bias" in name:
        bias_torch.append(param.detach().to(device))
actfunc_torch = ["identity", "logistic", "logistic", "identity"]

In [16]:
hessian = ns.hessian_mlp(wts_torch, bias_torch, actfunc_torch, X, y, use_torch=True, dev="cpu")

  ).T.reshape(meanSens.shape[1], -1),


In [17]:
# Check sensitivity metrics
# For X1, X5 mean and std should be around 0
# For X2, mean should be around -1
# For X3, X4, mean should be around 0.7
hessian.summary()

Sensitivity analysis of [8, 15, 15, 1] MLP network.

Sensitivity measures of each output:

$Y 

metric      mean                                                              \
input         X1        X2        X3        X4        X5        X6        X7   
X1     -0.011107 -0.001866 -0.001571  0.000979  0.001080 -0.003623  0.000494   
X2     -0.001866  0.942394  0.001276 -0.002804  0.000111 -0.000490 -0.000484   
X3     -0.001571  0.001276 -0.018037 -0.968939  0.000794  0.000458  0.000062   
X4      0.000979 -0.002804 -0.968939 -0.020661 -0.000164 -0.000051 -0.000670   
X5      0.001080  0.000111  0.000794 -0.000164 -0.000848  0.000619 -0.000919   
X6     -0.003623 -0.000490  0.000458 -0.000051  0.000619 -0.000562  0.000147   
X7      0.000494 -0.000484  0.000062 -0.000670 -0.000919  0.000147  0.001838   
X8     -0.002697  0.000015 -0.000549  0.000241 -0.000251  0.000282 -0.000004   

metric                 std            ...                     mean_squared  \
input         X8        X

In [18]:
hessian.info()

Sensitivity analysis of [8, 15, 15, 1] MLP network.

80000 samples

Sensitivities of each output (only 5 first samples):

$Y 

input        X1                                                              \
input        X1        X2        X3        X4        X5        X6        X7   
0     -0.064114 -0.038149 -0.030561 -0.014853 -0.000445 -0.003348 -0.010453   
1      0.019391  0.014610 -0.007596  0.019901  0.006695 -0.007230  0.005444   
2     -0.047013  0.045195  0.000634 -0.017053 -0.005657 -0.001592  0.010465   
3     -0.048508 -0.022523  0.021845 -0.066473 -0.000751 -0.002190 -0.010355   
4     -0.056365  0.010015  0.000110 -0.024922  0.000769 -0.006766 -0.000980   

input                  X2            ...        X7                  X8  \
input        X8        X1        X2  ...        X7        X8        X1   
0     -0.005481 -0.038149  1.137185  ...  0.001292  0.000806 -0.005481   
1     -0.001574  0.014610  1.121511  ...  0.004262  0.001084 -0.001574   
2     -0.000065  0.0451