In [193]:
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from torch import autograd

from collections import OrderedDict

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from pyDOE import lhs
from scipy.stats import uniform,norm


import os
import re


os.chdir("/home/s2113174/Projects-1")

#np.random.seed(1234)

# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [194]:
# Deep neural network
class DNN(torch.nn.Module):
    def __init__(self, layers):
        super(DNN, self).__init__()

        # Number of layers
        self.depth = len(layers) - 1
        
        # Activation Function
        self.activation = torch.nn.Tanh
        
        # The following loop organized the layers of the NN         
        layer_list = list()
        for i in range(self.depth - 1): 
            layer_list.append(
                ('layer_%d' % i, torch.nn.Linear(layers[i], layers[i+1])))
            layer_list.append(('activation_%d' % i, self.activation()))
        layer_list.append(
            ('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
        )
        layerDict = OrderedDict(layer_list)
        
        # Deploy layers
        self.layers = torch.nn.Sequential(layerDict)

        # for param in self.parameters():
        #     if len(param.shape) > 1:
        #         torch.nn.init.xavier_normal_(param)

    def forward(self, x):
        out = self.layers(x)
        return out

In [195]:
def test_set(max_space = 2,obs = 1,param = 1 ,mean = 0,std = 0):

    t= np.linspace(0,max_space,obs)

    sol = (param/ (2*np.pi))*np.sin(2*np.pi*t)

    noise_sol_test = sol + np.random.normal(mean,std, len(t))

    return t,noise_sol_test


def data(max_space = 2,obs = 1,param = 1 ,mean = 0,std = 0.1):

    t= np.linspace(0,max_space,obs)

    sol = (param/ (2*np.pi))*np.sin(2*np.pi*t)

    noise_sol_test = sol + np.random.normal(mean,std, len(t))

    x,y = torch.tensor(t).float().reshape(-1,1),torch.tensor(noise_sol_test).float().reshape(-1,1)
    
    X_u_train = TensorDataset(x,y)

    X_u_train = DataLoader(X_u_train,batch_size=obs)

    return X_u_train

In [196]:
t, y = test_set(obs = 1)

layers = [1] + 1*[10] + [1]
model = DNN(layers)
loss = torch.nn.MSELoss(reduction ='mean')

In [197]:
from backpack import backpack, extend
from backpack.extensions import DiagHessian, DiagGGNExact

model_ = extend(model, use_converter=True)
lossfunc_ = extend(loss)

loss_ = lossfunc_(model_(Variable(torch.tensor(t).float().reshape(-1,1),requires_grad=True)), torch.tensor(y).float().reshape(-1,1))

with backpack(DiagHessian(), DiagGGNExact()):
    loss_.backward()

for name, param in model_.named_parameters():
    print(name)
    print(".diag_ggn_exact.shape:   ", param.diag_ggn_exact)


layers.layer_0.weight
.diag_ggn_exact.shape:    tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])
layers.layer_0.bias
.diag_ggn_exact.shape:    tensor([0.0121, 0.0041, 0.0260, 0.0631, 0.0008, 0.0282, 0.0011, 0.0573, 0.0064,
        0.0263])
layers.layer_1.weight
.diag_ggn_exact.shape:    tensor([[5.2894e-02, 5.7157e-01, 1.1552e-04, 7.4106e-01, 6.5256e-01, 1.1698e-01,
         1.1451e+00, 6.2839e-01, 1.0056e+00, 1.2434e-01]])
layers.layer_1.bias
.diag_ggn_exact.shape:    tensor([2.0000])


In [198]:
from laplace import Laplace

la = Laplace(model, 'regression', subset_of_weights='last_layer', hessian_structure='diag')

dta = data(obs = 1)

la.fit(dta)

la.H

tensor([2.6447e-02, 2.8579e-01, 5.7762e-05, 3.7053e-01, 3.2628e-01, 5.8490e-02,
        5.7253e-01, 3.1420e-01, 5.0280e-01, 6.2172e-02, 1.0000e+00])

In [199]:
for name, module in model.named_modules():
    print(name)



layers
layers.layer_0
layers.activation_0
layers.layer_1


In [200]:
# a dict to store the activations
forw_activation,back_activation = {},{}
def forw_getActivation(name):
  # the hook signature
  def hook(model, input, output):
    forw_activation[name] = output.detach()
  return hook

def back_getActivation(name):
  # the hook signature
  def hook(model, input, output):
    back_activation[name] = output[0].detach()
  return hook

In [201]:
h1 = model.layers[1].register_forward_hook(forw_getActivation('layers.activation_0'))
h2 = model.layers[2].register_forward_hook(forw_getActivation('layers.layer_1'))
h3 = model.layers[0].register_forward_hook(forw_getActivation('layers.layer_0'))

b_h1 = model.layers[1].register_full_backward_hook(back_getActivation('layers.activation_0'))
b_h2 = model.layers[2].register_full_backward_hook(back_getActivation('layers.layer_1'))

In [202]:
t = torch.tensor(t).float().reshape(-1,1)
#y_ = model(t)

# forward pass using the functional API
# to take the parameters as input arguments
def make_functional_fwd(_model):
    def fn(data, parameters):
        return torch.func.functional_call(_model, parameters, (data,))
    return fn

model_func = make_functional_fwd(model) # functional forward
params = dict(model.named_parameters())

# the `argnums` argument allows to select with
# respect to which input argument of the functional forward
# pass defined in the closure
grad_params = torch.func.grad(model_func, argnums=1)(t[0], params)

# as before but for computing the gradient with
# respect to the input data
grad_x = torch.func.grad(model_func, argnums=0)(t[0], params)

RuntimeError: In order to use an autograd.Function with functorch transforms (vmap, grad, jvp, jacrev, ...), it must override the setup_context staticmethod. For more details, please see https://pytorch.org/docs/master/notes/extending.func.html

In [None]:
t = Variable(torch.tensor(t).float().reshape(-1,1),requires_grad=True)
y_ = model(t)

u_t = torch.autograd.grad(
    y_, t, 
    grad_outputs=torch.ones_like(y_),
    retain_graph=True,
    create_graph=True,
    allow_unused = True,
)[0]

f = u_t 

Loss = loss(f,torch.zeros_like(f))

In [None]:
torch.func.grad(y_,argnums=(0))(t)

TypeError: 'Tensor' object is not callable

In [None]:
u_t(t)

TypeError: 'Tensor' object is not callable

In [None]:
#Loss.backward(retain_graph=True)

In [None]:
h1.remove()
h2.remove()
h3.remove()

b_h1.remove()
b_h2.remove()

In [None]:
2*f

tensor([[-0.4089]], grad_fn=<MulBackward0>)

In [None]:
autograd.grad(y_, t, retain_graph=True)[0]

tensor([[-0.2044]])

In [None]:
2*f*autograd.grad(f, t, retain_graph=True)[0]

tensor([[-0.0797]], grad_fn=<MulBackward0>)

In [None]:
2*f*autograd.grad(f, t, retain_graph=True)[0] / autograd.grad(y_, t, retain_graph=True)[0]

tensor([[0.3896]], grad_fn=<DivBackward0>)

In [None]:
Loss

tensor(0.0418, grad_fn=<BackwardHookFunctionBackward>)

In [None]:
forw_activation

{'layers.layer_0': tensor([[-0.9378, -0.6547, -0.6665, -0.8461,  0.7228, -0.6660, -0.3661,  0.2303,
          -0.5328, -0.2058]]),
 'layers.activation_0': tensor([[-0.7342, -0.5748, -0.5827, -0.6890,  0.6186, -0.5823, -0.3505,  0.2263,
          -0.4875, -0.2029]]),
 'layers.layer_1': tensor([[-0.2477]])}

In [None]:
back_activation

{'layers.layer_1': tensor([[1.]]),
 'layers.activation_0': tensor([[ 0.1353,  0.3077, -0.0403, -0.2446, -0.1151,  0.3147, -0.1230, -0.0968,
           0.1429, -0.0885]])}

In [None]:

first_derivative = autograd.grad(Loss, t, retain_graph=True,create_graph=True)[0]/ autograd.grad(y_, t, retain_graph=True)[0]
#second_derivative = autograd.grad(first_derivative, y_)[0]
print(first_derivative)

tensor([[0.3896]], grad_fn=<DivBackward0>)


In [None]:
model.layers[-1].bias

Parameter containing:
tensor([0.1213], requires_grad=True)

In [None]:
model

DNN(
  (layers): Sequential(
    (layer_0): Linear(in_features=1, out_features=10, bias=True)
    (activation_0): Tanh()
    (layer_1): Linear(in_features=10, out_features=1, bias=True)
  )
)

In [None]:
print(2*torch.sqrt(Loss))

tensor(0.9666, grad_fn=<MulBackward0>)


In [None]:
y_

tensor([[-0.4833]], grad_fn=<BackwardHookFunctionBackward>)

In [None]:
forw_activation['layers.layer_1']

tensor([[-0.4833]])

In [None]:
#torch.dot(model.layers[2].weight.flatten(),forw_activation['layers.activation_0'].flatten())+ model.layers[2].bias.flatten()

In [None]:
#grad_f = back_activation['layers.layer_1']*(forw_activation['layers.activation_0'])
grad_f = (forw_activation['layers.activation_0'])

In [None]:
grad_f

tensor([[ 0.6889,  0.4813, -0.3163, -0.4149,  0.5590, -0.5403,  0.3087, -0.3865,
          0.2883,  0.1710]])

In [None]:
torch.sum(grad_f*grad_f,axis=0)

tensor([0.4746, 0.2317, 0.1001, 0.1722, 0.3125, 0.2920, 0.0953, 0.1494, 0.0831,
        0.0293])

In [None]:
print(forw_activation)

{'layers.layer_0': tensor([[ 0.8459,  0.5247, -0.3275, -0.4415,  0.6314, -0.6046,  0.3191, -0.4077,
          0.2967,  0.1727]]), 'layers.activation_0': tensor([[ 0.6889,  0.4813, -0.3163, -0.4149,  0.5590, -0.5403,  0.3087, -0.3865,
          0.2883,  0.1710]]), 'layers.layer_1': tensor([[-0.4833]])}


In [None]:
a_1 = forw_activation["layers.activation_0"]
g = back_activation["layers.layer_1"]

KeyError: 'layers.layer_1'

In [None]:
print(g)

tensor([[-0.0350]])


In [None]:
aa= torch.dot(a_1.reshape(-1),a_1.reshape(-1).T)

gg = torch.dot(g.reshape(-1),g.reshape(-1).T)


print(aa,gg)

tensor(2.2739) tensor(0.0012)


  aa= torch.dot(a_1.reshape(-1),a_1.reshape(-1).T)


In [None]:
back_activation

{'layers.layer_1': tensor([[-0.0350]]),
 'layers.activation_0': tensor([[-0.0027,  0.0046, -0.0093, -0.0023,  0.0032,  0.0069,  0.0044,  0.0065,
           0.0086, -0.0018]])}