In [3]:
import numpy as np
import scipy as sp
import torch
import functorch


In [35]:
# Set size of matrix
N = 2000

# Create diagonal matrix with random eigenvalues
# One negative eigenvalue and the rest positive
eigenvalues = np.concatenate([
    [-np.random.rand()],           # One random negative eigenvalue
    np.random.rand(N-1) + 1       # N-1 random positive eigenvalues (adding 1 ensures positive)
])

# Create a random orthogonal matrix for similarity transformation
Q, _ = np.linalg.qr(np.random.randn(N, N))

# Create the matrix A = Q * D * Q^T where D is diagonal matrix of eigenvalues
D = np.diag(eigenvalues)
A = Q @ D @ Q.T

# Verify eigenvalues
print("Eigenvalues:", np.min(np.linalg.eigvals(A)))

Eigenvalues: -0.6057816666140736


In [37]:
sp.sparse.linalg.eigs(A, k=2, return_eigenvectors=False, which='SM').real

array([ 1.00106947, -0.60578167])

In [38]:
A = torch.tensor(A)

In [None]:
print(torch.lobpcg(A, k=2, largest=False)[0])

tensor([-0.6058,  1.0011], dtype=torch.float64)


In [1]:
from hessian_eigenthings import hvp_operator

In [76]:
class MLP(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = torch.nn.Linear(6, 32)
        self.layer2 = torch.nn.Linear(32, 16)
        self.layer3 = torch.nn.Linear(16, 1)
        self.tanh = torch.nn.Tanh()
        
    def forward(self, x):
        x = self.tanh(self.layer1(x))
        x = self.tanh(self.layer2(x))
        x = self.layer3(x)  # No activation on final layer
        return x

# Create an instance of the model
model = MLP()
model_fn = torch.func.functionalize(model)


test_input = torch.randn(6)
print("Input shape:", test_input.shape)

# Test the model
output = model(test_input)
print("Output shape:", output.shape)
print("Output values:", output.squeeze())
print("Output values:", model_fn(test_input))

Input shape: torch.Size([6])
Output shape: torch.Size([1])
Output values: tensor(-0.0772, grad_fn=<SqueezeBackward0>)
Output values: tensor([-0.0772], grad_fn=<SqueezeBackward1>)


In [None]:
v = torch.randn(6)
hvp_fn = lambda s: torch.autograd.functional.hvp(model_fn, test_input, s)
print(hvp_fn(v))


RuntimeError: v has invalid size: should be torch.Size([6]) but got torch.Size([4, 6]).

In [55]:
model_fn(test_input).shape

torch.Size([4, 1])

In [79]:
test_input = torch.randn(6).requires_grad_(True)

torch.autograd.grad(model_fn(test_input).sum(-1), test_input)

(tensor([-0.0337,  0.0541, -0.0585, -0.0282,  0.0077,  0.0416]),)