In [1]:
import torch
import numpy as np
from tqdm import tqdm
from analytical_expressions import local_energy
from torch.autograd.functional import jacobian
from torch.func import jacrev
import matplotlib.pyplot as plt
from torch.func import vmap

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def psi(X):
    x = X[:3]
    y = X[3:6]
    alpha_1, alpha_2, alpha_3, alpha_4 = X[6:]
    r1 = torch.norm(x)
    r2 = torch.norm(y)
    r12 = torch.norm(x - y)

    term1 = torch.exp(-2 * (r1 + r2))
    term2 = 1 + 0.5 * r12 * torch.exp(-alpha_1 * r12)
    term3 = 1 + alpha_2 * (r1 + r2) * r12 + alpha_3 * (r1 - r2)**2 - alpha_4 * r12

    return term1 * term2 * term3

In [5]:
psi_vec = vmap(psi)

In [6]:
def metropolis(N: int, n_runs: int, alphas: torch.tensor):  
    """
    Vectorized metropolis loop
    Over N steps, for n_runs. 
    Alphas passes in must be of same dim as n_runs
    """  
    assert alphas.shape[0] == n_runs        
    L = 1
    r1 = (torch.rand(n_runs, 3, requires_grad=True) * 2 * L - L)
    r2 = (torch.rand(n_runs, 3, requires_grad=True) * 2 * L - L)
    max_steps = 1000
    sampled_Xs = []
    rejection_ratio = 0

    for i in tqdm(range(N)):
        chose = torch.rand(n_runs).reshape(n_runs, 1)
        dummy = torch.rand(n_runs)

        perturbed_r1 = r1 + 0.5 * (torch.rand(n_runs, 3) * 2 * L - L)
        perturbed_r2 = r2 + 0.5 * (torch.rand(n_runs, 3) * 2 * L - L)

        r1_trial = torch.where(chose < 0.5, perturbed_r1, r1)
        r2_trial = torch.where(chose >= 0.5, perturbed_r2, r2)
        psi_val = psi_vec(torch.cat((r1, r2, alphas), axis=1))
        psi_trial_val = psi_vec(torch.cat((r1_trial, r2_trial, alphas), axis=1))      
        psi_ratio = psi_trial_val / psi_val

        density_comp = psi_trial_val >= psi_val
        dummy_comp = dummy < psi_ratio

        condition = density_comp + dummy_comp

        rejection_ratio += torch.where(condition, 1./N, 0.0)

        condition = condition.reshape(condition.shape[0], 1)

        # Careful with overwriting
        r1 = torch.where(condition, r1_trial, r1)
        r2 = torch.where(condition, r2_trial, r2)
                
        if i > max_steps:
            sampled_Xs.append(torch.cat((r1, r2, alphas), axis=1))

    return torch.stack(sampled_Xs)

In [7]:
local_e_vec = vmap(local_energy)
local_e_vec_vec = vmap(local_e_vec)

def get_local_energies(X):
    reshaped_X = X.reshape(
        X.shape[1], X.shape[0], X.shape[2])
    return local_e_vec_vec(reshaped_X)

def get_mean_energies(E):
    return torch.mean(torch.mean(E, dim=1))

In [8]:
def dE_dalpha(input):
    return jacrev(local_energy)(input)

dE_dalpha_vec = vmap(dE_dalpha)
dE_dalpha_vec_vec = vmap(dE_dalpha_vec)

def get_dE_dX(X):
    reshaped_X = X.reshape(
        X.shape[1], X.shape[0], X.shape[2])
    return dE_dalpha_vec_vec(X)

In [10]:
alpha_1 = torch.tensor(1.013, dtype=torch.float64, requires_grad=True) # 1.013
alpha_2 = torch.tensor(0.2119, dtype=torch.float64, requires_grad=True)
alpha_3 = torch.tensor(0.1406, dtype=torch.float64, requires_grad=True)
alpha_4 = torch.tensor(0.003, dtype=torch.float64, requires_grad=True)

In [11]:
device = torch.device("cuda")
cpu = torch.device("cpu")

## Start of simulation

In [12]:
n_steps = 5
alphas = torch.tensor([alpha_1, alpha_2, alpha_3, alpha_4]).unsqueeze(0).repeat(n_steps, 1)
sampled_Xs = metropolis(10000, n_steps, alphas=alphas)

100%|██████████| 10000/10000 [00:16<00:00, 612.62it/s]


In [66]:
sampled_Xs_gpu = sampled_Xs.clone().to(device)

In [62]:
torch.cuda.empty_cache()

In [13]:
E = get_local_energies(sampled_Xs)
mean_E = get_mean_energies(E.to(cpu))
print(f"Mean energy is {torch.mean(torch.mean(E, axis=1))}")

Mean energy is -2.920655483312287


In [16]:
## Getting gradients
X_ = sampled_Xs[:,0]

In [50]:
from gradient_expressions import get_psi_alpha

In [97]:
def get_gradients_from_expression(X_, E_):
    psi_alpha = vmap(get_psi_alpha)(X_)

    part_1 = psi_alpha - torch.mean(psi_alpha, axis=0)
    part_2 = E_ - torch.mean(E_)
    return torch.mean(part_1.T * part_2, axis=1)

dE_dalpha = vmap(get_gradients_from_expression)

In [95]:
gradients = get_dE_dX(sampled_Xs)

In [80]:
gradients.shape

torch.Size([8999, 5, 10])

In [87]:
reshaped_X = sampled_Xs.reshape(
        sampled_Xs.shape[1], sampled_Xs.shape[0], sampled_Xs.shape[2])

In [82]:
X_.shape

torch.Size([8999, 10])

In [92]:
torch.mean(vmap(get_gradients_from_expression)(reshaped_X, E), axis=0)

tensor([ 0.0210,  0.9693, -0.3699,  0.0443], dtype=torch.float64,
       grad_fn=<MeanBackward1>)

In [96]:
torch.mean(torch.mean(gradients, axis=0), axis=0)[-4:]

tensor([-0.0090,  0.0440,  0.0194, -0.0349], dtype=torch.float64,
       grad_fn=<SliceBackward0>)

Energy value should be −2.901188

The actual value is −2.9037243770

In [70]:
true_value = -2.9037243770

In [100]:
import math

In [99]:
E_true = -2.9037243770

In [112]:
# Naive approach - define loss as true energy - found energy

epochs = 2000
alphas = [alpha_1, alpha_2, alpha_3, alpha_4]
losses = []
n_walkers = 10
met_steps = 50000

for i in range(epochs):

    # Step 5: Zero gradients (as usual in PyTorch)
    alphas = [alpha_1, alpha_2, alpha_3, alpha_4]
    alphas_metropolis = torch.tensor(alphas).unsqueeze(0).repeat(n_walkers, 1)
    sampled_Xs = metropolis(met_steps, n_walkers, alphas=alphas_metropolis)


    E = get_local_energies(sampled_Xs.to(device))
    mean_E = get_mean_energies(E.to(cpu))
    loss = torch.abs(E_true - mean_E)

    print(f"Mean energy is {mean_E}")
    print(f"Loss is {loss}")
    losses.append(loss.item())

    reshaped_X = sampled_Xs.reshape(
        sampled_Xs.shape[1], sampled_Xs.shape[0], sampled_Xs.shape[2])
    gradients = dE_dalpha(reshaped_X.to(device), E).to(cpu)

    # Step 2: Create Adam optimizer
    optimizer = torch.optim.Adam(alphas, lr=0.00001)

    # Now, assume you already have gradients computed externally:
    # Example: for step t, these are your gradients (replace with actual values)
    gradients = torch.mean(gradients, axis=0)
    external_grads = gradients.detach()

    # Step 3: Assign gradients manually
    for p, g in zip(alphas, external_grads):
        p.grad = g  # assign your externally computed gradient

    # Step 4: Optimizer step
    optimizer.step()
    optimizer.zero_grad()

    torch.cuda.empty_cache()
    del sampled_Xs




100%|██████████| 50000/50000 [01:01<00:00, 810.14it/s]


Mean energy is -2.945884902431609
Loss is 0.042160525431608864


100%|██████████| 50000/50000 [00:57<00:00, 871.86it/s]


Mean energy is -2.9713208807113234
Loss is 0.06759650371132331


100%|██████████| 50000/50000 [00:56<00:00, 880.98it/s]


Mean energy is -2.954641916486964
Loss is 0.05091753948696409


100%|██████████| 50000/50000 [00:55<00:00, 894.00it/s]


Mean energy is -2.9170323762050763
Loss is 0.013307999205076193


100%|██████████| 50000/50000 [00:57<00:00, 877.11it/s]


Mean energy is -2.943909031972007
Loss is 0.04018465497200685


100%|██████████| 50000/50000 [00:56<00:00, 889.44it/s]


Mean energy is -2.957242682083927
Loss is 0.053518305083926965


100%|██████████| 50000/50000 [00:54<00:00, 922.65it/s]


Mean energy is -2.946447231669823
Loss is 0.04272285466982284


100%|██████████| 50000/50000 [00:53<00:00, 938.79it/s]


Mean energy is -2.9778694319573598
Loss is 0.07414505495735968


100%|██████████| 50000/50000 [00:53<00:00, 939.04it/s]


Mean energy is -2.9397839585137056
Loss is 0.036059581513705474


100%|██████████| 50000/50000 [00:54<00:00, 910.32it/s]


Mean energy is -2.9680158263418592
Loss is 0.06429144934185915


100%|██████████| 50000/50000 [00:53<00:00, 928.54it/s]


Mean energy is -2.9727091729602626
Loss is 0.06898479596026252


  5%|▌         | 2656/50000 [00:02<00:52, 902.82it/s]


KeyboardInterrupt: 

In [137]:
alpha_1 = torch.tensor(2.013, dtype=torch.float64, requires_grad=True) # 1.013
alpha_2 = torch.tensor(0.6419, dtype=torch.float64, requires_grad=True) # 0.2119
alpha_3 = torch.tensor(2.1406, dtype=torch.float64, requires_grad=True) # 0.1406
alpha_4 = torch.tensor(3.003, dtype=torch.float64, requires_grad=True) # 0.003

In [113]:
alpha_1

tensor(2.0130, dtype=torch.float64, requires_grad=True)

In [106]:
external_grads

tensor([-0.0080,  0.0429,  0.0155, -0.0305], dtype=torch.float64)

## Gradient values

In [34]:
def dE_dalpha(input):
    return jacrev(local_energy)(input)

t = dE_dalpha_vec(torch.stack(inputs_arr[0]))

In [72]:
dE_dalpha_mean = torch.mean(t, axis=0)

In [36]:
psi_vmap = vmap(psi)

In [38]:
psi_values = psi_vmap(torch.stack(inputs_arr[0]))

In [61]:
mean_energy = sum(energies[0])/(len(energies[0]))

In [63]:
El_Etheta = energies[0] - mean_energy

In [65]:
mean_psi = torch.mean(psi_values)

In [73]:
dE_dalpha_mean.shape

torch.Size([10])

In [86]:
t[0].shape

torch.Size([10])

In [89]:
psi_values.shape

torch.Size([9500])

In [92]:
psi_dalph = torch.stack([psi_values[i] * t[i] for i in range(len(t))])

In [97]:
psi_dalph.shape

torch.Size([9500, 10])

In [98]:
dE_dalpha_mean.shape

torch.Size([10])

In [95]:
mean_psi

tensor(0.0207, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [77]:
t.shape

torch.Size([9500, 10])

In [111]:
a = psi_values.unsqueeze(1).repeat(1, 10) * t

In [110]:
b = (mean_psi * dE_dalpha_mean).unsqueeze(0).repeat(9500, 1)

In [119]:
c = (energies[0] - mean_energy).unsqueeze(1).repeat(1, 10)

In [115]:
mean_energy.shape

torch.Size([])

In [120]:
gradients = (a - b) * (c)

In [124]:
torch.mean(gradients, axis=0)

tensor([ 0.0652,  0.4838,  0.4329,  0.0146,  0.1017,  0.0883,  0.0167, -0.0222,
        -0.0307,  0.0271], dtype=torch.float64, grad_fn=<MeanBackward1>)

In [52]:
energies[0][1]

tensor(-1.6312, dtype=torch.float64, grad_fn=<SelectBackward0>)

In [53]:
E_fixed = [energies[0][i] / psi_values[i] for i in range(len(inputs_arr[0]))]

In [55]:
torch.mean(torch.stack(E_fixed))

tensor(-2546.7772, dtype=torch.float64, grad_fn=<MeanBackward0>)