# Initialization:

In [1]:
import torch
import dqc
import dqc.xc
import dqc.utils

from DQCAdapter import DQCAdapter

In [2]:
class MyLDAX(dqc.xc.CustomXC):
    def __init__(self, a_par, p_par):
        super().__init__()
        self.a_par = a_par
        self.p_par = p_par
        self.number_of_parameters = 2

    @property
    def family(self):
        # 1 for LDA, 2 for GGA, 4 for MGGA
        return 1

    def get_edensityxc(self, densinfo):
        # densinfo has up and down components
        if isinstance(densinfo, dqc.utils.SpinParam):
            # spin-scaling of the exchange energy
            return 0.5 * (self.get_edensityxc(densinfo.u * 2) + self.get_edensityxc(densinfo.d * 2))
        else:
            rho = densinfo.value.abs() + 1e-15  # safeguarding from nan
            return self.a_par * rho ** self.p_par
        
    def get_edensityxc_derivative(self, densinfo, number_of_parameter):
        # densinfo has up and down components
        if isinstance(densinfo, dqc.utils.SpinParam):
            # spin-scaling of the exchange energy
            return 0.5 * (self.get_edensityxc_derivative(densinfo.u * 2, number_of_parameter) 
                          + self.get_edensityxc_derivative(densinfo.d * 2, number_of_parameter))
        else:
            rho = densinfo.value.abs() + 1e-15  # safeguarding from nan
            if number_of_parameter == 0: # parameter a
                return rho ** self.p_par
            elif number_of_parameter == 1: # parameter p
                return self.a_par * torch.log(rho) * rho ** self.p_par

In [3]:
a_par = torch.nn.Parameter(torch.tensor(1.0, dtype=torch.double))
p_par = torch.nn.Parameter(torch.tensor(2.0, dtype=torch.double))
myxc = MyLDAX(a_par, p_par)

In [4]:
mol = dqc.Mol(moldesc="N -1 0 0; N 1 0 0", basis="3-21G")
qc = dqc.KS(mol, xc=myxc).run()
ene = qc.energy()
print(ene)

tensor(-54.0932, dtype=torch.float64, grad_fn=<AddBackward0>)




In [5]:
adapter = DQCAdapter(qc)

In [6]:
fockian = adapter.get_fockian()
print(fockian.size())

torch.Size([18, 18])


In [7]:
coefficients = adapter.get_orbital_coefficients()
coefficients.size()

torch.Size([18, 7])

In [8]:
orbital_energies = adapter.get_orbital_energies()
orbital_energies.size()

torch.Size([7])

In [9]:
occupancy = adapter.get_orbital_occupancy()
occupancy.size()

torch.Size([7])

In [10]:
number_of_occupied_orbitals = adapter.get_number_of_occupied_orbitals()

# Calculating adjoint derivatives:

## Derivatives from energy:

### Calculating $\frac{\partial E[\rho](\vec{\theta})}{\partial \textbf{C}}$

#### The first way to calculate derivative from energy with respect to coefficients:
Every type of orbitals can be used

$$ \frac{\partial E[\rho](\vec{\theta})}{\partial C_{bj}} = 2f_b \sum_i C_{bi}F_{ij}$$
so
$$ \frac{\partial E[\rho](\vec{\theta})}{\partial \textbf{C}} = 2\textbf{f}\textbf{C}\textbf{F}$$
$\textbf{f}$ here is matrix: $f_{ab}=\delta_{ab}f_a$

In [11]:
dE_wrt_dC_first_way = 2 * torch.einsum("b,ib,ij->bj", occupancy, coefficients, fockian)
dE_wrt_dC_first_way.size()

torch.Size([7, 18])

#### The second way to calculate derivative from energy with respect to coefficients
Only canonical (i.e. eigenfunctions of fockian) orbitals can be used

from the other hand, 
$$ \frac{\partial E[\rho](\vec{\theta})}{\partial C_{bj}} = 2f_b \sum_i C_{bi}F_{ij} = 2f_b \epsilon_b C_{bj}$$
so
$$ \frac{\partial E[\rho](\vec{\theta})}{\partial \textbf{C}} = 2\textbf{f}\epsilon\textbf{C}$$
$\epsilon$ here is matrix: $\epsilon_{ab}=\delta_{ab}\epsilon_a$

In [12]:
dE_wrt_dC_second_way = 2 * torch.einsum("b,b,jb->bj", occupancy, orbital_energies, coefficients)
dE_wrt_dC_second_way.size()

torch.Size([7, 18])

Check that both ways lead to the same results:

In [13]:
print(torch.linalg.matrix_norm(dE_wrt_dC_first_way - dE_wrt_dC_second_way))

tensor(2.7748e-13, dtype=torch.float64, grad_fn=<CopyBackwards>)


### Calculating $\frac{\partial E[\rho](\vec{\theta})}{\partial \vec{\epsilon}}$

$$\frac{\partial E[\rho](\vec{\theta})}{\partial \epsilon_{b}} = 0$$ for all $\epsilon_b$. So:

In [14]:
dE_wrt_depsilon = torch.zeros(number_of_occupied_orbitals)
dE_wrt_depsilon.size()

torch.Size([7])

## Derivatives from normalization equations:

### Calculating $\frac{\partial \vec{r}(\textbf{C})}{\partial \textbf{C}}$

$$\frac{\partial r_{a}(\textbf{C})}{\partial C_{ck}} = 2\delta_{ac}C_{ck}$$


In [15]:
occupied_orbitals_kronecker = torch.eye(number_of_occupied_orbitals)

In [16]:
dnorm_wrt_dC = 2 * torch.einsum("ac,kc->kac", occupied_orbitals_kronecker, coefficients)
dnorm_wrt_dC.size()

torch.Size([18, 7, 7])

### Calculating $\frac{\partial \vec{r}(\textbf{C})}{\partial \vec{\epsilon}}$

$$\frac{\partial r_{a}(\textbf{C})}{\partial \epsilon_b} = 0$$


In [17]:
dnorm_wrt_depsilon = torch.zeros((number_of_occupied_orbitals, number_of_occupied_orbitals))
dnorm_wrt_depsilon.size()

torch.Size([7, 7])

## Derivatives from Roothan equations:

### Calculating $\frac{\partial \textbf{r}(\textbf{C};\;\vec{\theta})}{\partial \textbf{C}}$

$$G_{Cou, ijkl} = \iint b_i(\vec{r}) \frac{b_k(\vec{r'})b_l(\vec{r'})}{|\vec{r}-\vec{r'}|} b_j(\vec{r})d\vec{r'}d\vec{r}$$
$$G_{XC, ijkl} = 
\int b_i(\vec{r}) b_k(\vec{r}) \frac{\partial V_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \rho(\vec{r})}b_l(\vec{r})b_j(\vec{r})d\vec{r}$$
$$\frac{\partial r_{ia}(\textbf{C};\;\vec{\theta})}{\partial C_{ck}} = 
(F_{ik}[\rho](\vec{\theta}) - \epsilon_c\delta_{ik})\delta_{ac} +
2f_c\sum_j \sum_{l}C_{cl}\left(G_{Cou,ijkl} + G_{XC,ijkl}\right)C_{aj}
$$

First term:

In [18]:
number_of_all_orbitals = adapter.get_number_of_all_orbitals()
all_orbitals_kronecker = torch.eye(number_of_all_orbitals)
dRoothan_wrt_dC_first_first_term = torch.einsum("ik,ac->iakc", fockian, occupied_orbitals_kronecker)
dRoothan_wrt_dC_first_second_term = torch.einsum("c,ik,ac->iakc", orbital_energies, all_orbitals_kronecker, occupied_orbitals_kronecker)
dRoothan_wrt_dC_first_term = dRoothan_wrt_dC_first_first_term + dRoothan_wrt_dC_first_second_term
dRoothan_wrt_dC_first_term.size()

torch.Size([18, 7, 18, 7])

Second term:
(NB: fourDtensor loses symmetry of XC-tensor):

In [19]:
fourDtensor = adapter.get_four_center_elrep_tensor() + adapter.get_four_center_xc_tensor()
dRoothan_wrt_dC_second_term = 2 * torch.einsum("c,ijkl,lc,ja->iakc", occupancy, fourDtensor, coefficients, coefficients)
dRoothan_wrt_dC_second_term.size()

torch.Size([18, 7, 18, 7])

The sum:

In [20]:
dRoothan_wrt_dC = dRoothan_wrt_dC_first_term + dRoothan_wrt_dC_second_term
dRoothan_wrt_dC.size()

torch.Size([18, 7, 18, 7])

### Calculating $\frac{\partial \textbf{r}(\textbf{C};\;\vec{\theta})}{\partial \vec{\epsilon}}$


$$\frac{\partial r_{ia}(\textbf{C};\;\vec{\theta})}{\partial \epsilon_{c}} = -\delta_{ac}C_{ai}$$

In [21]:
dRoothan_wrt_depsilon = -1 * torch.einsum("ac,kc->kac", occupied_orbitals_kronecker, coefficients)
dRoothan_wrt_depsilon.size()

torch.Size([18, 7, 7])

# Calculate adjoint vector

## Concatenate tensors:

### $\frac{\partial E}{\partial \vec{X}}$

$$\left( \frac{\partial E}{\partial \vec{X}} \right)_{N_{ao}j+c} =  \begin{cases}
   \frac{\partial E}{\partial С_{cj}} &\text{if $0 \le j < N_{orb}$} \\
   \frac{\partial E}{\partial \epsilon_{c}} &\text{if $j = N_{orb}$}
 \end{cases}$$

In [22]:
print(dE_wrt_depsilon.unsqueeze(-1).size())
print(dE_wrt_dC_second_way.size())
dE_wrt_dX = torch.cat((dE_wrt_dC_second_way, dE_wrt_depsilon.unsqueeze(-1)), 1)
print(dE_wrt_dX.size())
dE_wrt_dX = dE_wrt_dX.t().reshape(-1,)
print(dE_wrt_dX.size())

torch.Size([7, 1])
torch.Size([7, 18])
torch.Size([7, 19])
torch.Size([133])


Check it:

In [23]:
for j in range(number_of_all_orbitals + 1):
    for c in range(number_of_occupied_orbitals):
        index = number_of_occupied_orbitals * j + c
        if (j < number_of_all_orbitals):
            assert(dE_wrt_dX[index] == dE_wrt_dC_second_way[c][j])
        else:
            assert(dE_wrt_dX[index] == dE_wrt_depsilon[c])

### $\frac{\partial \vec{Y}}{\partial \vec{X}}$

$$\left( \frac{\partial \vec{Y}}{\partial \vec{X}} \right)_{N_{ao}i+a,N_{ao}j+c} =  \begin{cases}
   \frac{\partial r_{ia}}{\partial С_{cj}} &\text{if $0 \le i < N_{orb}$ and $0 \le j < N_{orb}$} \\
   \frac{\partial r_{ia}}{\partial \epsilon_{c}} &\text{if $0 \le i < N_{orb}$ and $j = N_{orb}$} \\
   \frac{\partial r_{a}}{\partial С_{cj}} &\text{if $i = N_{orb}$ and $0 \le j < N_{orb}$} \\
   \frac{\partial r_{a}}{\partial \epsilon_{c}} &\text{if $i = N_{orb}$ and $j = N_{orb}$}
 \end{cases}$$

In [24]:
nao_norb_product = number_of_all_orbitals * number_of_occupied_orbitals
upper_left = dRoothan_wrt_dC.reshape((nao_norb_product,nao_norb_product))
print(dRoothan_wrt_dC.size(), "=>", upper_left.size())
upper_right = dRoothan_wrt_depsilon.reshape((nao_norb_product, number_of_occupied_orbitals))
print(dRoothan_wrt_depsilon.size(), "=>", upper_right.size())
upper = torch.cat((upper_left, upper_right), 1)
print(upper_left.size(), "+", upper_right.size(), "=", upper.size())

torch.Size([18, 7, 18, 7]) => torch.Size([126, 126])
torch.Size([18, 7, 7]) => torch.Size([126, 7])
torch.Size([126, 126]) + torch.Size([126, 7]) = torch.Size([126, 133])


In [25]:
lower_left = dnorm_wrt_dC.reshape(nao_norb_product, number_of_occupied_orbitals).t()
print(dnorm_wrt_dC.size(), "=>", lower_left.size())
lower = torch.cat((lower_left, dnorm_wrt_depsilon), 1)
print(lower_left.size(), "+", dnorm_wrt_depsilon.size(), "=", lower.size())

torch.Size([18, 7, 7]) => torch.Size([7, 126])
torch.Size([7, 126]) + torch.Size([7, 7]) = torch.Size([7, 133])


In [26]:
dY_wrt_dX = torch.cat((upper, lower), 0)
print(upper.size(), "+", lower.size(), "=", dY_wrt_dX.size())

torch.Size([126, 133]) + torch.Size([7, 133]) = torch.Size([133, 133])


Check it:

In [27]:
for i in range(number_of_all_orbitals + 1):
    for a in range(number_of_occupied_orbitals):
        for j in range(number_of_all_orbitals + 1):
            for c in range(number_of_occupied_orbitals):
                index = (number_of_occupied_orbitals * i + a, number_of_occupied_orbitals * j + c)
                if (i < number_of_all_orbitals):
                    if (j < number_of_all_orbitals):
                        assert(dY_wrt_dX[index[0]][index[1]] == dRoothan_wrt_dC[i][a][j][c]),(i,a,j,c)
                    else:
                        assert(dY_wrt_dX[index[0]][index[1]] == dRoothan_wrt_depsilon[i][a][c]),(i,a,c)
                else:
                    if (j < number_of_all_orbitals):
                        assert(dY_wrt_dX[index[0]][index[1]] == dnorm_wrt_dC[j][a][c]),(j,a,c)
                    else:
                        assert(dY_wrt_dX[index[0]][index[1]] == dnorm_wrt_depsilon[a][c]),(a,c)

## Find inverse matrix of $\frac{\partial \vec{Y}}{\partial \vec{X}}$

In [28]:
dY_wrt_dX_inverse = torch.linalg.inv(dY_wrt_dX)
print(dY_wrt_dX_inverse.size())

torch.Size([133, 133])


## Find adjoint vector:

In [29]:
adjoint_vector = torch.matmul(dY_wrt_dX_inverse, dE_wrt_dX)
adjoint_vector.size()

torch.Size([133])

# Calculating derivatives with respect to parameters

## Derivative from energy: $\frac{\partial E[\rho](\vec{\theta})}{\partial \vec{\theta}}$

$$ \frac{\partial E[\rho](\vec{\theta})}{\partial \vec{\theta}} = 
 \int  \frac{\partial \rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}}d\vec{r} 
$$
so we just can put $\frac{\partial \rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}}$ instead of $\rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})$ to DQC calculation.

In [35]:
dE_wrt_dtheta = adapter.get_derivative_of_exc_wrt_theta()
dE_wrt_dtheta.size()

torch.Size([2])

## Derivative from normalization equations:

$$\frac{\partial r_{a}(\textbf{C})}{\partial\vec{\theta}}= 0$$ so

In [30]:
number_of_parameters = adapter.get_number_of_parameters()
dnorm_wrt_dtheta = torch.zeros(number_of_occupied_orbitals, number_of_parameters)
dnorm_wrt_dtheta.size()

torch.Size([7, 2])

## Derivative from Roothan equations: 

$$\frac{\partial r_{ia}(\textbf{C};\;\vec{\theta})}{\partial \vec{\theta}} =
\sum_j C_{aj}\int b_i(\vec{r}) \frac{\partial V_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}} b_j(\vec{r})d\vec{r}$$

As we know, $$V_{XC}[\rho](\vec{r};\;\vec{\theta}) = \frac{\partial E_{XC}[\rho]}{\partial\rho(\vec{r})} = \frac{\partial \rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial\rho(\vec{r})}$$
So

$$\frac{\partial r_{ia}(\textbf{C};\;\vec{\theta})}{\partial \vec{\theta}} =
\sum_j C_{aj}\int b_i(\vec{r}) \frac{\partial\rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}\partial\rho(\vec{r})} b_j(\vec{r})d\vec{r} = 
\sum_j C_{aj}\int b_i(\vec{r}) \frac{ \frac{\partial \rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}}}{\partial\rho(\vec{r})} b_j(\vec{r})d\vec{r}
$$
It means, we can use $\frac{\partial \rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})}{\partial \vec{\theta}}$ instead of $\rho(\vec{r})\epsilon_{XC}[\rho](\vec{r};\;\vec{\theta})$ in DQC function`get_vxc()`and get suitable result. This function takes densinfo and takes functional derivative with respect to density.

In [32]:
dvxc_wrt_dtheta = adapter.get_derivative_of_vxc_wrt_theta()
print(dvxc_wrt_dtheta.size())

torch.Size([18, 18, 2])


In [33]:
dRoothan_wrt_dtheta = torch.einsum("ijt,ja->iat", dvxc_wrt_dtheta, coefficients)
print(dRoothan_wrt_dtheta.size())

torch.Size([18, 7, 2])


# Concatenate derivatives with respect to parameters

In [42]:
dY_wrt_dtheta = dRoothan_wrt_dtheta.reshape(nao_norb_product, number_of_parameters)
print(dRoothan_wrt_dtheta.size(), "=>", dY_wrt_dtheta.size())
dY_wrt_dtheta = torch.cat((dY_wrt_dtheta, dnorm_wrt_dtheta), 0)
print(dY_wrt_dtheta.size())

torch.Size([18, 7, 2]) => torch.Size([126, 2])
torch.Size([133, 2])


# Total derivative:

In [44]:
total_dE_wrt_dtheta = dE_wrt_dtheta + torch.matmul(adjoint_vector, dY_wrt_dtheta)
print(total_dE_wrt_dtheta)

tensor([14.9943, 25.5368], dtype=torch.float64, grad_fn=<AddBackward0>)


In [45]:
grad_a, grad_p = torch.autograd.grad(ene, (a_par, p_par))
print(grad_a, grad_p)

tensor(14.9891, dtype=torch.float64) tensor(25.6106, dtype=torch.float64)
