In [1]:
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["CUBLAS_WORKSPACE_CONforcesIG"] = ":4096:8"
os.environ["PYTORCH_JIT_DISABLE_NVforcesUSER"] = "1"
os.environ["TORCH_COMPILE_DISABLE"] = "1"

###########################################################

import torch
import numpy as np
import dxtb
from dxtb.config import ConfigCache
from tqdm import tqdm


torch.autograd.set_detect_anomaly(True)
torch.use_deterministic_algorithms(False, warn_only=False)
torch.set_deterministic_debug_mode(False)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
np.random.seed(0)

torch.cuda.empty_cache()

###########################################################

print("Torch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
print("CUDNN version:", torch.backends.cudnn.version())

###########################################################

problematic_batch_path = "../problematic_batches/problematic_batch_for_full.pt"

dd = {"dtype": torch.float32, "device": torch.device("cuda:0")}
opts = {
    "scf_mode": "full",
    "batch_mode": 2, 
    "int_driver": "libcint",
    # "exclude": ["disp"], # excluding D3 fixes the issue
}

# Load the problematic batch
problematic_batch = torch.load(
    problematic_batch_path, weights_only=False
)
numbers = problematic_batch["numbers"].to(dd["device"])
positions = problematic_batch["positions"].to(**dd)

batch_size = numbers.shape[0]
charges = torch.full((batch_size,), 0, **dd)

opts_wout_d3 = opts.copy()
opts_wout_d3["exclude"] = ["disp"]

calc = dxtb.Calculator(numbers, dxtb.GFN1_XTB, **dd, opts=opts)
calc.opts.cache = ConfigCache(enabled=False, density=True, fock=True)

calc_wout_d3 = dxtb.Calculator(numbers, dxtb.GFN1_XTB, **dd, opts=opts_wout_d3)
calc_wout_d3.opts.cache = ConfigCache(enabled=False, density=True, fock=True)

e = calc.get_energy(positions, charges)
e_wout_d3 = calc_wout_d3.get_energy(positions, charges)

f = torch.autograd.grad(e.sum(), positions, retain_graph=True)[0]
f_wout_d3 = torch.autograd.grad(e_wout_d3.sum(), positions, retain_graph=True)[0]

P = calc.get_density(positions, charges)
P_wout_d3 = calc_wout_d3.get_density(positions, charges)

P_grad = torch.autograd.grad(P.sum(), positions, retain_graph=True)[0]
P_wout_d3_grad = torch.autograd.grad(P_wout_d3.sum(), positions, retain_graph=True)[0]


Torch version: 2.5.1
CUDA version: 12.4
CUDNN version: 90100


In [6]:
# Comparisons

## Energy diffs
print(f"Avg E {e.mean():.3f}, Avg E w/out {e_wout_d3.mean():.3f}")
print(f"First energy: {e[0].item():.6f}, First energy w/out: {e_wout_d3[0].item():.6f}")
print(f"Max abs E diff: {torch.max(torch.abs(e - e_wout_d3)):.6e}")
print(f"Mean abs E diff: {torch.mean(torch.abs(e - e_wout_d3)):.6e}")
print(f"Rel E diff: {torch.norm(e - e_wout_d3) / torch.norm(e):.6e}")


## Force diffs
print("\nForces:")
force_diff = f - f_wout_d3
print(f"First force entry: {f[0,0]}, First force w/out: {f_wout_d3[0,0]}")
print(f"Max abs forces diff: {torch.max(torch.abs(force_diff)):.6e}")
print(f"Mean abs forces diff: {torch.mean(torch.abs(force_diff)):.6e}")
print(f"forces diff norm ratio: {torch.norm(force_diff) / torch.norm(f):.6e}")

## Density diffs
print("\nDensity:")
density_diff = P - P_wout_d3
print(f"First density entry: {P[0,0]}, First density w/out: {P_wout_d3[0,0]}")
print(f"Max abs P diff: {torch.max(torch.abs(density_diff)):.6e}")
print(f"Mean abs P diff: {torch.mean(torch.abs(density_diff)):.6e}")
print(f"P diff norm ratio: {torch.norm(density_diff) / torch.norm(P):.6e}")

## Density grad diffs
print("\nDensity grads:")
density_grad_diff = P_grad - P_wout_d3_grad
print(f"First density grad entry: {P_grad[0,0]}, First density grad w/out: {P_wout_d3_grad[0,0]}")
print(f"Max abs dP diff: {torch.max(torch.abs(density_grad_diff)):.6e}")
print(f"Mean abs dP diff: {torch.mean(torch.abs(density_grad_diff)):.6e}")
print(f"dP diff norm ratio: {torch.norm(density_grad_diff) / torch.norm(P_grad):.6e}")

# Numerical check
print("\nNumerical check:")
print(f"NaN in forces w/ D3: {torch.isnan(f).any().item()}")
print(f"Inf in forces w/ D3: {torch.isinf(f).any().item()}")
print(f"NaN in forces w/o D3: {torch.isnan(f_wout_d3).any().item()}")
print(f"Inf in forces w/o D3: {torch.isinf(f_wout_d3).any().item()}")

Avg E -16.231, Avg E w/out -16.229
First energy: -16.182373, First energy w/out: -16.180489
Max abs E diff: 1.890182e-03
Mean abs E diff: 1.834124e-03
Rel E diff: 1.130369e-04

Forces:
First force entry: tensor([-0.0082, -0.0114,  0.0033], device='cuda:0'), First force w/out: tensor([-0.0082, -0.0114,  0.0033], device='cuda:0')
Max abs forces diff: 4.419982e-04
Mean abs forces diff: 3.866575e-05
forces diff norm ratio: 6.527558e-03

Density:
First density entry: tensor([ 1.6938e+00,  5.2472e-01,  2.1520e-01, -1.2234e-01, -1.8841e-02,
         7.1359e-03, -1.0848e-02, -1.7478e-02, -3.5624e-03, -6.2113e-03,
        -2.9211e-02,  1.2141e-02, -1.9394e-02, -4.1280e-02, -2.9110e-02,
        -3.7572e-03, -3.8152e-02,  1.8603e-01,  7.6812e-02, -4.8398e-02,
        -1.6779e-03, -6.6227e-03,  3.5152e-02,  5.4207e-03], device='cuda:0',
       grad_fn=<SelectBackward0>), First density w/out: tensor([ 1.6938e+00,  5.2472e-01,  2.1520e-01, -1.2234e-01, -1.8841e-02,
         7.1359e-03, -1.0848e-02, 