In [32]:
import torch

In [33]:
from gadff.horm.ff_lmdb import LmdbDataset
from gadff.path_config import fix_horm_dataset_path

input_lmdb_path = fix_horm_dataset_path("ts1x-val.lmdb")
dataset = LmdbDataset(input_lmdb_path)
print(f"Loaded dataset with {len(dataset)} samples from {input_lmdb_path}")

sample = dataset[0]
print(sample.hessian.shape)
print(sample.pos.shape)


Loaded dataset with 50844 samples from /home/andreasburger/.cache/kagglehub/datasets/yunhonghan/hessian-dataset-for-optimizing-reactive-mliphorm/versions/5/ts1x-val.lmdb
torch.Size([2304])
torch.Size([16, 3])


In [34]:
def get_hessian_eigenvalues_and_vectors(hessian, N, D=3, hermitian=True):
    # ND = N*3 = num_atoms * num_dim
    hessian = hessian.view(N*D, N*D) # [ND, ND]

    if hermitian:
        # [N*D], [N*D, N*D]
        eigenvalues, eigenvectors = torch.linalg.eigh(hessian)
    else:
        eigenvalues, eigenvectors = torch.linalg.eig(hessian)
        # Sort eigenvalues and eigenvectors so that eigenvalues are in ascending order
        sorted_eigenvals, sort_indices = torch.sort(eigenvalues)
    
        # convention is that the eigenvectors are column vectors
        sorted_eigenvecs = eigenvectors[:, sort_indices]
        
        eigenvalues = sorted_eigenvals
        eigenvectors = sorted_eigenvecs
    
    # verify that the eigenvalues are in ascending order
    for i in range(eigenvalues.shape[0] - 1):
        assert eigenvalues[i] <= eigenvalues[i+1]
    
    smallest_eigenvals = eigenvalues[:2].cpu() # [2]
    smallest_eigenvecs = eigenvectors[:, :2].cpu() # [3*N, 2]

    # Reshape eigenvectors to [2, N, 3] format
    eigvecs_reshaped = smallest_eigenvecs.T.reshape(2, N*D)  # [2, ND]

    # Add new fields to the original data object
    eigval1 = smallest_eigenvals[0:1]  # Keep as [1] tensor
    eigval2 = smallest_eigenvals[1:2]  # Keep as [1] tensor
    eigvec1 = eigvecs_reshaped[0]  # [N, 3]
    eigvec2 = eigvecs_reshaped[1]  # [N, 3]

    return eigval1, eigval2, eigvec1, eigvec2

In [35]:
# Get the original sample
original_sample = dataset[0]

n_atoms = original_sample.pos.shape[0] # [N]

# Compute smallest eigenvalues and eigenvectors from DFT Hessian
dft_hessian = original_sample.hessian  
dft_hessian = dft_hessian.view(n_atoms*3, n_atoms*3) # Shape should be [3*N * 3*N]

# Memory movement overhead is not worth it
# dft_hessian = dft_hessian.to(device)


eigval1, eigval2, eigvec1, eigvec2 = get_hessian_eigenvalues_and_vectors(dft_hessian, n_atoms)

In [36]:

# Verify that the first eigenvector is indeed an eigenvector of the hessian with the correct eigenvalue

# Compute H * v
hessian_times_eigenvec = torch.matmul(dft_hessian, eigvec1)  # [3*N]

# Compute λ * v  
eigenval_times_eigenvec = eigval1 * eigvec1  # [3*N]

# Check if H*v = λ*v (eigenvector equation)
eigenvec_error = torch.max(torch.abs(hessian_times_eigenvec - eigenval_times_eigenvec))
print(f"Eigenvector verification error (max |H*v - λ*v|): {eigenvec_error:.2e}")

# Also check the second eigenvector
print("\nVerifying second eigenvector...")

hessian_times_eigenvec2 = torch.matmul(dft_hessian, eigvec2)
eigenval_times_eigenvec2 = eigval2 * eigvec2
eigenvec_error2 = torch.max(torch.abs(hessian_times_eigenvec2 - eigenval_times_eigenvec2))
print(f"Eigenvector verification error (max |H*v - λ*v|): {eigenvec_error2:.2e}")

# Check orthogonality of eigenvectors
orthogonality_error = torch.abs(torch.dot(eigvec1, eigvec2))
print(f"\nOrthogonality error (|v1·v2|): {orthogonality_error:.2e}")

Eigenvector verification error (max |H*v - λ*v|): 1.50e+01

Verifying second eigenvector...
Eigenvector verification error (max |H*v - λ*v|): 1.34e+01

Orthogonality error (|v1·v2|): 2.58e-01
