In [1]:
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

from collections import OrderedDict

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from pyDOE import lhs
from scipy.stats import uniform,norm


import os
import re


os.chdir("/home/s2113174/Projects-1")

#np.random.seed(1234)

# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [2]:
# Deep neural network
class DNN(torch.nn.Module):
    def __init__(self, layers):
        super(DNN, self).__init__()

        # Number of layers
        self.depth = len(layers) - 1
        
        # Activation Function
        self.activation = torch.nn.Tanh
        
        # The following loop organized the layers of the NN         
        layer_list = list()
        for i in range(self.depth - 1): 
            layer_list.append(
                ('layer_%d' % i, torch.nn.Linear(layers[i], layers[i+1])))
            layer_list.append(('activation_%d' % i, self.activation()))
        layer_list.append(
            ('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
        )
        layerDict = OrderedDict(layer_list)
        
        # Deploy layers
        self.layers = torch.nn.Sequential(layerDict)

        # for param in self.parameters():
        #     if len(param.shape) > 1:
        #         torch.nn.init.xavier_normal_(param)

    def forward(self, x):
        out = self.layers(x)
        return out

In [3]:
def test_set(max_space = 2,obs = 1,param = 1 ,mean = 0,std = 0):

    t= np.linspace(0,max_space,obs)

    sol = (param/ (2*np.pi))*np.sin(2*np.pi*t)

    noise_sol_test = sol + np.random.normal(mean,std, len(t))

    return t,noise_sol_test


def data(max_space = 2,obs = 1,param = 1 ,mean = 0,std = 0.1):

    t= np.linspace(0,max_space,obs)

    sol = (param/ (2*np.pi))*np.sin(2*np.pi*t)

    noise_sol_test = sol + np.random.normal(mean,std, len(t))

    x,y = torch.tensor(t).float().reshape(-1,1),torch.tensor(noise_sol_test).float().reshape(-1,1)
    
    X_u_train = TensorDataset(x,y)

    X_u_train = DataLoader(X_u_train,batch_size=obs)

    return X_u_train

In [4]:
t, y = test_set(obs = 1)

layers = [1] + 1*[10] + [1]
model = DNN(layers)
loss = torch.nn.MSELoss(reduction ='mean')

In [5]:
from backpack import backpack, extend
from backpack.extensions import DiagHessian, DiagGGNExact

model_ = extend(model, use_converter=True)
lossfunc_ = extend(loss)

loss_ = lossfunc_(model_(Variable(torch.tensor(t).float().reshape(-1,1),requires_grad=True)), torch.tensor(y).float().reshape(-1,1))

with backpack(DiagHessian(), DiagGGNExact()):
    loss_.backward()

for name, param in model_.named_parameters():
    print(name)
    print(".diag_ggn_exact.shape:   ", param.diag_ggn_exact)


layers.layer_0.weight
.diag_ggn_exact.shape:    tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])
layers.layer_0.bias
.diag_ggn_exact.shape:    tensor([0.0398, 0.0431, 0.0430, 0.0165, 0.0780, 0.0144, 0.0238, 0.1425, 0.0424,
        0.1130])
layers.layer_1.weight
.diag_ggn_exact.shape:    tensor([[0.9492, 0.4634, 0.2001, 0.3443, 0.6250, 0.5839, 0.1906, 0.2988, 0.1662,
         0.0585]])
layers.layer_1.bias
.diag_ggn_exact.shape:    tensor([2.0000])


In [6]:
from laplace import Laplace

la = Laplace(model, 'regression', subset_of_weights='last_layer', hessian_structure='diag')

dta = data(obs = 1)

la.fit(dta)

la.H

tensor([0.4746, 0.2317, 0.1001, 0.1722, 0.3125, 0.2920, 0.0953, 0.1494, 0.0831,
        0.0293, 1.0000])

(tensor([[-0.4833]]), tensor([[[2.0340]]]))

In [7]:
for name, module in model.named_modules():
    print(name)



layers
layers.layer_0
layers.activation_0
layers.layer_1


In [8]:
# a dict to store the activations
forw_activation,back_activation = {},{}
def forw_getActivation(name):
  # the hook signature
  def hook(model, input, output):
    forw_activation[name] = output.detach()
  return hook

def back_getActivation(name):
  # the hook signature
  def hook(model, input, output):
    back_activation[name] = output[0].detach()
  return hook

In [9]:
h1 = model.layers[1].register_forward_hook(forw_getActivation('layers.activation_0'))
h2 = model.layers[2].register_forward_hook(forw_getActivation('layers.layer_1'))
h3 = model.layers[0].register_forward_hook(forw_getActivation('layers.layer_0'))

b_h1 = model.layers[1].register_full_backward_hook(back_getActivation('layers.activation_0'))
b_h2 = model.layers[2].register_full_backward_hook(back_getActivation('layers.layer_1'))

In [10]:
t = Variable(torch.tensor(t).float().reshape(-1,1),requires_grad=True)
y_ = model(t)


Loss = loss(y_,torch.tensor(y).float().reshape(-1,1))

In [11]:
#Loss.backward(create_graph=True)

In [12]:
h1.remove()
h2.remove()
h3.remove()

b_h1.remove()
b_h2.remove()

In [13]:
from torch import autograd

first_derivative = autograd.grad(Loss, y_, create_graph=True)[0]
second_derivative = autograd.grad(first_derivative, y_)[0]

In [14]:
print(second_derivative)

tensor([[2.]])


In [15]:
model

DNN(
  (layers): Sequential(
    (layer_0): Linear(in_features=1, out_features=10, bias=True)
    (activation_0): Tanh()
    (layer_1): Linear(in_features=10, out_features=1, bias=True)
  )
)

In [16]:
print(2*torch.sqrt(Loss))

tensor(0.9666, grad_fn=<MulBackward0>)


In [17]:
y_

tensor([[-0.4833]], grad_fn=<BackwardHookFunctionBackward>)

In [18]:
forw_activation['layers.layer_1']

tensor([[-0.4833]])

In [19]:
#torch.dot(model.layers[2].weight.flatten(),forw_activation['layers.activation_0'].flatten())+ model.layers[2].bias.flatten()

In [20]:
#grad_f = back_activation['layers.layer_1']*(forw_activation['layers.activation_0'])
grad_f = (forw_activation['layers.activation_0'])

In [21]:
grad_f

tensor([[ 0.6889,  0.4813, -0.3163, -0.4149,  0.5590, -0.5403,  0.3087, -0.3865,
          0.2883,  0.1710]])

In [22]:
torch.sum(grad_f*grad_f,axis=0)

tensor([0.4746, 0.2317, 0.1001, 0.1722, 0.3125, 0.2920, 0.0953, 0.1494, 0.0831,
        0.0293])

In [23]:
print(forw_activation)

{'layers.layer_0': tensor([[ 0.8459,  0.5247, -0.3275, -0.4415,  0.6314, -0.6046,  0.3191, -0.4077,
          0.2967,  0.1727]]), 'layers.activation_0': tensor([[ 0.6889,  0.4813, -0.3163, -0.4149,  0.5590, -0.5403,  0.3087, -0.3865,
          0.2883,  0.1710]]), 'layers.layer_1': tensor([[-0.4833]])}


In [24]:
a_1 = forw_activation["layers.activation_0"]
g = back_activation["layers.layer_1"]

KeyError: 'layers.layer_1'

In [None]:
print(g)

tensor([[-0.0350]])


In [None]:
aa= torch.dot(a_1.reshape(-1),a_1.reshape(-1).T)

gg = torch.dot(g.reshape(-1),g.reshape(-1).T)


print(aa,gg)

tensor(2.2739) tensor(0.0012)


  aa= torch.dot(a_1.reshape(-1),a_1.reshape(-1).T)


In [None]:
back_activation

{'layers.layer_1': tensor([[-0.0350]]),
 'layers.activation_0': tensor([[-0.0027,  0.0046, -0.0093, -0.0023,  0.0032,  0.0069,  0.0044,  0.0065,
           0.0086, -0.0018]])}