In [None]:
"""
---------------------------------------------------------------------------------------------------------------------------------------------------------
This document contains a library-based method for calculating the Hessian. The approach works, although I have kept the code of my failed
attempts towards the end of file.
---------------------------------------------------------------------------------------------------------------------------------------------------------
"""

'\n---------------------------------------------------------------------------------------------------------------------------------------------------------\nThis document contains a library-based method for calculating the Hessian. The approach appears to work, although I have kept the code of my failed\nattempts towards the end of file.\n---------------------------------------------------------------------------------------------------------------------------------------------------------\n'

In [None]:
import torch
from torch import nn
import numpy as np
from torch.nn import Module
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt
import torch.optim as optim

In [None]:
device = torch.device('cuda:' + str(0) if torch.cuda.is_available() else 'cpu')

In [None]:
class ODEFunc(nn.Module):

    def __init__(self):
        super(ODEFunc, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(2, 50),
            nn.Tanh(),
            nn.Linear(50, 2),
        )

        for m in self.net.modules():
          if isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, mean=0, std=0.1)
            nn.init.constant_(m.bias, val=0)

    def forward(self, y):
        return self.net(y**3)

func = ODEFunc()

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
This approach works, and produces a 252 x 252 matrix. It is a slightly less broken-down version of the code used below.
------------------------------------------------------------------------------------------------------------------------
"""

def get_loss_square(params_vector):

  a = params_vector[:100].reshape([50, 2])
  b = params_vector[100:150].reshape([50])
  c = params_vector[150:250].reshape([2, 50])
  d = params_vector[250:252].reshape([2])

  input = torch.tensor([1.0,1.0])
  y = torch.tensor([2.0,0.0])

  x = F.linear(input, a, b)
  m = nn.Tanh()
  x = m(x)
  x = F.linear(x, c, d)

  loss = torch.linalg.norm(y-x)
  return loss

def get_hessian(net):

  param_tensors = net.parameters()
  params_vector = torch.tensor([])
  for param in param_tensors:
    vec = torch.reshape(param, (-1,))
    params_vector = torch.cat((params_vector, vec))

  hessian = torch.autograd.functional.hessian(get_loss_square, params_vector)
  return hessian

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
This approach works, and produces a 252 x 252 matrix. It is a slightly more broken-down version of the code above.
------------------------------------------------------------------------------------------------------------------------
"""
def net(a, b, c, d):

  input = torch.tensor([1.0,1.0])

  x = F.linear(input, a, b)
  m = nn.Tanh()
  x = m(x)
  x = F.linear(x, c, d)

  return x


def get_loss_square_1(params_vector):

  a = params_vector[:100].reshape([50, 2])
  b = params_vector[100:150].reshape([50])
  c = params_vector[150:250].reshape([2, 50])
  d = params_vector[250:252].reshape([2])

  y = torch.tensor([2.0,0.0])
  x = net(a, b, c, d)
  
  loss = torch.linalg.norm(y-x)
  return loss

def get_hessian_1(net):

  param_tensors = net.parameters()
  params_vector = torch.tensor([])
  for param in param_tensors:
    vec = torch.reshape(param, (-1,))
    params_vector = torch.cat((params_vector, vec))

  hessian = torch.autograd.functional.hessian(get_loss_square_1, params_vector)
  return hessian

In [None]:
"""
-------------------------------------------------------------------------------------------------------------------------------------
This approach works, and produces a 252 x 252 matrix. It is built using library functions and a nn.Module, which means 
it can more easily be used in the context of NODEs. This is the method that I have implemented during the training of 
simple NODEs.
-------------------------------------------------------------------------------------------------------------------------------------
"""

class Network(nn.Module):

  def __init__(self, a, b, c, d):
    super(Network, self).__init__()
    self.a = a
    self.b = b
    self.c = c
    self.d = d

  def forward(self, y):
    x = F.linear(y, self.a, self.b)
    m = nn.Tanh()
    x = m(x)
    x = F.linear(x, self.c, self.d)
    return x


def get_loss_square_2(params_vector):

  a = params_vector[0:100].reshape([50, 2])
  b = params_vector[100:150].reshape([50])
  c = params_vector[150:250].reshape([2, 50])
  d = params_vector[250:252].reshape([2])

  neural_net = Network(a, b, c, d).to(device)
  input = torch.tensor([1.0,1.0]).to(device)
  target = torch.tensor([2.0,0.0]).to(device)
  pred_y = neural_net(input)
  
  loss = torch.linalg.norm(pred_y - target) 
  return loss

def get_library_hessian(net):

  param_tensors = net.parameters()
  params_vector = torch.tensor([]).to(device)   
  for param in param_tensors:
    vec = torch.reshape(param, (-1,)).to(device)
    params_vector = torch.cat((params_vector, vec))

  hessian = torch.autograd.functional.hessian(get_loss_square_2, params_vector)
  return hessian

In [None]:
func = ODEFunc()
hessian = get_library_hessian(func)


NameError: ignored

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
All of the below are my failed attempts!
------------------------------------------------------------------------------------------------------------------------
"""

In [None]:
"""
---------------------------------------------------------------------------------------------------------------------------
This is my attempted adaption of the above code to using a nn.Module function, so that it can be used with the adjoint 
method for odeint(). It (erroneously) produces a matrix of zeros.
---------------------------------------------------------------------------------------------------------------------------
"""

def net(input, a, b, c, d):

  x = F.linear(input**3, a, b)
  m = nn.Tanh()
  x = m(x)
  x = F.linear(x, c, d)

  return x

class Network(nn.Module):

  def __init__(self, a, b, c, d):
    super(Network, self).__init__()
    self.a = a
    self.b = b
    self.c = c
    self.d = d

  def forward(self, t, y):
    x = net(y, self.a, self.b, self.c, self.d)
    return x

def get_loss_square(params_vector):

  a = params_vector[:100].reshape([50, 2]).to(device)
  b = params_vector[100:150].reshape([50]).to(device)
  c = params_vector[150:250].reshape([2, 50]).to(device)
  d = params_vector[250:252].reshape([2]).to(device)

  true_y = torch.tensor([2.0,2.0]).to(device)
  neural_net = Network(a,b,c,d)
  pred_y = odeint(neural_net, true_y0, t)

  loss = torch.linalg.norm(true_y-pred_y)
  return loss

def get_hessian(net):

  param_tensors = net.parameters()
  params_vector = torch.tensor([]).to(device)
  for param in param_tensors:
    vec = torch.reshape(param, (-1,)).to(device)
    params_vector = torch.cat((params_vector, vec)).to(device)

  hessian = torch.autograd.functional.hessian(get_loss_square, params_vector)
  return hessian

get_hessian(func)

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
This approach seems to work fine, but I don't yet understand how to interpret the outcome.
------------------------------------------------------------------------------------------------------------------------
"""

def get_loss(w, x, y, z):
  """
  Calculates the loss by taking the network parameters as inputs.
  Inputs: w, x, y, z: these are torch.tensor() objects of shape [50, 2], [50], [2, 50] and [2] respectively.
  """

  #Calculate the network input and target values.
  input = torch.tensor([1.0,1.0])
  target = torch.tensor([2.0,0.0])

  #Calculate the network output.
  v = F.linear(input, w, x)
  m = nn.Tanh()
  v = m(v)
  v = F.linear(v, y, z)
  
  #Obtain the loss.
  loss = torch.linalg.norm(target-v)
  return loss

In [None]:
if __name__ == '__main__':

  #functional.hessian() requires a tuple of tensors that are the inputs to the function get_loss())
  inputs = tuple([param for param in func.parameters()])
  hessians = torch.autograd.functional.hessian(get_loss, inputs) 

  """
  param_names = ('w', 'x', 'y', 'z')
  for d_name, d_hessians in zip(param_names, hessians):
    for dd_name, dd_hessian in zip(param_names, d_hessians):
        print(f'dl/d{dd_name}d{d_name} = ' + str(dd_hessian.shape))
        #print(f'dl/d{dd_name}d{d_name} = \n{dd_hessian}\n')
  """

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
This approach uses nn.Module parameters and does not seem to work; the Hessian obtained is simply a matrix of zeroes.
More detail on this can perhaps be found here:
https://discuss.pytorch.org/t/using-autograd-functional-jacobian-hessian-with-respect-to-nn-module-parameters/103994
------------------------------------------------------------------------------------------------------------------------
"""

func = ODEFunc()
param_tensors = nn.ParameterList(func.parameters())

params_vector = torch.tensor([])
for param in param_tensors:
  vec = torch.reshape(param, (-1,))
  params_vector = torch.cat((params_vector, vec))

def loss(params_vector):
  parameters_list = nn.ParameterList([])
  for param in func.parameters():
    nels = torch.numel(param)
    elements = params_vector[:nels]
    elements = torch.reshape(elements, param.shape)
    elements = torch.nn.parameter.Parameter(elements)

    parameters_list.append(elements)
    params_vector = params_vector[nels:]

  net = ODEFunc(parameters_list) 
  input = torch.rand(2,2)
  output = net(input)
  target = torch.rand(2,2)
  return torch.linalg.norm(target - output)

hessian = torch.autograd.functional.hessian(loss, params_vector)
print(hessian)

#parameters = torch.tensor([])
#for param in func.parameters():
  #params = torch.reshape(param.data, (-1,))  #Reshapes the gradient to a 1D vector.
  #parameters = torch.cat((params, parameters), 0) 

In [None]:
"""
------------------------------------------------------------------------------------------------------------------------
This has the same problem as above.
------------------------------------------------------------------------------------------------------------------------
"""

func = ODEFunc()
param_tensors = func.parameters()

params_vector = torch.tensor([])
for param in param_tensors:
  vec = torch.reshape(param, (-1,))
  params_vector = torch.cat((params_vector, vec))

parameters_list = []
func = ODEFunc()

def get_loss(params_vector):

  for param in func.parameters():
    nels = torch.numel(param)
    elements = params_vector[:nels]
    elements = torch.reshape(elements, param.shape)
    elements = torch.nn.parameter.Parameter(elements)

    parameters_list.append(elements)
    params_vector = params_vector[nels:]

  a = parameters_list[0]
  b = parameters_list[1]
  c = parameters_list[2]
  d = parameters_list[3]
  
  input = torch.rand(2)
  y = torch.tensor([2,0])

  x = F.linear(input, a, b)
  m = nn.Tanh()
  x = m(x)
  x = F.linear(x, c, d)

  loss = torch.linalg.norm(y-x)
  return loss

loss = get_loss(params_vector)
hessian = torch.autograd.functional.hessian(get_loss_1, params_vector)
print(hessian)

