In [1]:
! pip install pyhessian



In [6]:
import torch
from pyhessian import hessian
from pyhessian.utils import group_product, group_add, normalization, get_params_grad, hessian_vector_product, orthnormal

In [1]:
import torch
from typing import List, Dict, Any

class SimpleCNN(torch.nn.Module):
    def __init__(self, layers_dim: List[int]):
        super().__init__()
        self.blocks = torch.nn.ModuleList([
            torch.nn.Sequential(torch.nn.Conv2d(layer_dim1, layer_dim2, 3, padding=1),
                                torch.nn.ReLU(),
                                torch.nn.Conv2d(layer_dim2, layer_dim2, 3, padding=1, stride=2),
                                torch.nn.ReLU(),
                                # torch.nn.MaxPool2d(2, 2)
                                )
            for layer_dim1, layer_dim2 in zip(layers_dim[:-3], layers_dim[1:-2])
        ])
        # flatten_dim = infer_flatten_dim(conv_params, layers_dim[-3])
        # napisz wnioskowanie spłaszczonego wymiaru
        self.final_layer = torch.nn.Sequential(torch.nn.Linear(4096, layers_dim[-2]), torch.nn.ReLU(),
                                               torch.nn.Linear(layers_dim[-2], layers_dim[-1]))

    def forward(self, x):
        for block in self.blocks:
            x = block(x)
        x = x.flatten(start_dim=1)
        x = self.final_layer(x)
        return x
    

class MLP(torch.nn.Module):
    def __init__(self, layers_dim):
        super().__init__()
        self.layers = torch.nn.ModuleList([
            torch.nn.Sequential(torch.nn.Linear(hidden_dim1, hidden_dim2), torch.nn.ReLU())
            for hidden_dim1, hidden_dim2 in zip(layers_dim[:-2], layers_dim[1:-1])
        ])
        self.final_layer = torch.nn.Linear(layers_dim[-2], layers_dim[-1])

    def forward(self, x):
        x = x.flatten(start_dim=1)
        for layer in self.layers:
            x = layer(x)
        x = self.final_layer(x)
        return x

In [7]:
model = SimpleCNN([3, 64, 128, 128, 128, 10])

In [8]:
model

SimpleCNN(
  (blocks): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (3): ReLU()
    )
    (1): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (3): ReLU()
    )
    (2): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (3): ReLU()
    )
  )
  (final_layer): Sequential(
    (0): Linear(in_features=4096, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [8]:
import os

import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4096

trainset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=False,
                                        download=False, transform=transform)
testloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=False, num_workers=8)

model = SimpleCNN([3, 32, 64, 128, 10])

inputs, targets = next(iter(testloader))
criterion = torch.nn.CrossEntropyLoss()

In [9]:
hessian_comp = hessian(model, criterion, dataloader=testloader, cuda=False)

In [13]:
top_eigenvalues, _ = hessian_comp.eigenvalues(5)
top_eigenvalues



[-0.2514059543609619]

In [41]:
trace = hessian_comp.trace()
trace

[-0.13655626773834229, -9.76462459564209]

In [13]:
inputs.shape

torch.Size([512, 3, 32, 32])

In [5]:
hessian_comp = hessian(
            model, criterion=criterion, dataloader=data, cuda=False
        )
top_eigenvalues, _ = hessian_comp.eigenvalues()
trace = hessian_comp.trace()

ValueError: too many values to unpack (expected 2)

In [None]:
hessian()

### From pytorch

In [33]:
import torch.autograd.functional as F

In [34]:
def compute_loss(inputs, targets):
    outputs = model(inputs)
    loss = F.cross_entropy(outputs, targets)
    return loss

In [38]:
F.hessian(compute_loss, (inputs, targets))

RuntimeError: only Tensors of floating point dtype can require gradients

# Eigenthings

In [16]:
!pip install --upgrade git+https://github.com/noahgolmant/pytorch-hessian-eigenthings.git@master#egg=hessian-eigenthings

Collecting hessian-eigenthings
  Cloning https://github.com/noahgolmant/pytorch-hessian-eigenthings.git (to revision master) to /tmp/pip-install-25m0sk0w/hessian-eigenthings_3238bbb9fe6b47778d3bf674cebe7273
  Running command git clone --filter=blob:none --quiet https://github.com/noahgolmant/pytorch-hessian-eigenthings.git /tmp/pip-install-25m0sk0w/hessian-eigenthings_3238bbb9fe6b47778d3bf674cebe7273
  Resolved https://github.com/noahgolmant/pytorch-hessian-eigenthings.git to commit dce2e54a19963b0dfa41b93f531fb7742d46ea04
  Preparing metadata (setup.py) ... [?25ldone
Collecting scipy>=1.2.1
  Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Building wheels for collected packages: hessian-eigenthings
  Building wheel for hessian-eigenthings (setup.py) ... [?25ldone
[?25h  Created wheel for hessian-eige

In [17]:
from hessian_eigenthings import compute_hessian_eigenthings

In [19]:
eigenvals, eigenvecs = compute_hessian_eigenthings(model, testloader,
                                                   criterion, 1,use_gpu=False)



In [20]:
eigenvals

array([-0.11508816])

In [22]:
eigenvals, eigenvecs = compute_hessian_eigenthings(model, testloader,
                                                   criterion, 1,use_gpu=False, mode='lanczos')

KeyboardInterrupt: 