# Testing of ISDF Expansion W.r.t. Pair Product Matrix

Testing Summary

* Set benzene up again: Compute the grid and wave functions
* Construct product basis matrix: L.H.S.
* Construct the interpolation vectors
* Implement Eta @ (Phi Psi), where Eta has shape(N_grid, N_interp) and PhiPsi has shape (N_iterp, Ne^2): R.H.S
* Numerically compare to exact solution (Compare L.H.S. to R.H.S.)
* Plot approximate against exact solution

In [1]:
"""Build Benzene molecule with a minimal GTO basis, usign PYSCF
"""
%load_ext autoreload
%autoreload 2

from pyscf import gto, dft
from rdkit import Chem
from isdf_prototypes.helpers import pyscf_occupied_mos

bohr_to_ang =  0.529177249

benzene_coordinates = """
 C         -0.65914       -1.21034        3.98683
 C          0.73798       -1.21034        4.02059
 C         -1.35771       -0.00006        3.96990
 C          1.43653       -0.00004        4.03741
 C         -0.65915        1.21024        3.98685
 C          0.73797        1.21024        4.02061
 H         -1.20447       -2.15520        3.97369
 H          1.28332       -2.15517        4.03382
 H         -2.44839       -0.00006        3.94342
 H          2.52722       -0.00004        4.06369
 H         -1.20448        2.15509        3.97373
 H          1.28330        2.15508        4.03386
"""

# PYSCF Molecule
mol = gto.Mole()
mol.atom = benzene_coordinates
mol.basis = 'def2-SVP'
mol.build()

# RDKIT molecule from SMILES data
rdkit_mol = Chem.MolFromSmiles("c1ccccc1")
rdkit_mol = Chem.AddHs(rdkit_mol)
# mol.atom_coords(unit='angstom'), symbols=[mol.atom_symbol(i) for i in range(12)]

# Solve SCF for restricted KS-LDA
mf = dft.RKS(mol)
mf.kernel()
n_occ = pyscf_occupied_mos(mf)
assert n_occ == 21, "Occupied states for benzene, with this basis is 21"

converged SCF energy = -229.930646528207


In [2]:
"""Define real-space grid
"""
from pathlib import Path

import numpy as np
from pyscf.tools import cubegen

from isdf_prototypes.visualise import write_xyz

# Root for outputs
output_root = Path('../outputs/isdf_product_expansion')

# Grid/cube settings
nx, ny, nz = 10, 10, 10
n_total = np.prod([nx, ny, nz])

# Generate the real-space grid
cube_grid = cubegen.Cube(mol, nx, ny, nz)
grid_points = cube_grid.get_coords()
assert grid_points.shape == (n_total, 3)

# Output full grid and clusters to .xyz with dummy species
# grid_xyz = write_xyz(['Sr']*n_total, grid_points * bohr_to_ang)
# with open(output_root / 'grid.xyz', "w") as fid:
#     fid.write(grid_xyz)




In [3]:
"""Occupied Wavefunctions
"""
# Generate MOs on real-space grid
wfs = np.empty(shape=(n_total, n_occ))
for i in range(n_occ):
    cube_file = output_root / f'benzene_mo{i}.cube'
    # pyscf expects a string for fname
    molecular_orbital = cubegen.orbital(mol, cube_file.as_posix(), mf.mo_coeff[:, i], nx=nx, ny=ny, nz=nz)
    wfs[:, i] = molecular_orbital.reshape(-1)


In [4]:
"""Export various functions on the grid

Note, C to fortran indexing is dealt with on the fortran side.
Might have been able to simplify by exporting using `asfortranarray`
https://numpy.org/doc/stable/reference/generated/numpy.asfortranarray.html
"""
# 1-electron Density matrix
dm = mf.make_rdm1()
cube_file = output_root / f'density.cube'
rho = cubegen.density(mol, cube_file.as_posix(), dm, nx=nx, ny=ny, nz=nz)
rho = rho.reshape(-1)

shape_string = lambda array: " ".join(str(x) for x in array.shape)
np.savetxt('grid.out', grid_points, header=f'{nx} {ny} {nz} {grid_points.shape[-1]}')
np.savetxt('wfs.out', wfs, header=f'{nx} {ny} {nz}, {wfs.shape[-1]} ')
np.savetxt('density.out', rho, header=f'{nx} {ny} {nz}')


# print("Box lengths", cube_grid.box[0, 0], cube_grid.box[1, 1], cube_grid.box[2, 2])
# print("Origin", cube_grid.boxorig)
#
# ir = 0
# for i  in range(0, nx):
#     for j  in range(0, ny):
#         for k  in range(0, nz):
#             # Haven't got this correct
#             # my_grid = (np.array([i, j, k]) * np.diag(cube_grid.box)) + cube_grid.boxorig
#             # print(my_grid, grid_points[ir, :])
#             print(grid_points[ir, :])
#             ir += 1



In [20]:
""" L.H.S. Full product basis matrix
"""
from isdf_prototypes.math_ops import face_splitting_product


# Construct product basis matrix
z = face_splitting_product(wfs)
assert z.shape == (n_total, n_occ**2)


In [23]:
from typing import Tuple

""" R.H.S.: Interpolation points and vectors, and approximate product matrix
"""
from isdf_prototypes.interpolation_points_via_qr import randomly_sample_product_matrix, interpolation_points_via_qrpivot
from isdf_prototypes.isdf_vectors import construct_interpolation_vectors_matrix


def compute_approximate_product_matrix(wfs, n_interp) -> Tuple[np.ndarray, int]:
    """ Compute an approximate product matrix using ISDF interpolation vectors
     
     z_{\text{isdf}} = \sum_\mu^{N_\mu} \zeta_{\mu}(\mathbf{r}) \phi_i(\mathbf{r}_\mu) \phi_j(\mathbf{r}_\mu)

    which can be represented as the matrix equation:
    
    Z_{\text{isdf}} = \Zeta (Phi \bullet \Psi), 
    
    where (Phi \bullet \Psi) is the face-splitting product.
    """
    # Avoid using global data
    total_grid_size = wfs.shape[0]
    n_products = wfs.shape[1]**2

    # Construct the interpolation points
    z_sampled = randomly_sample_product_matrix(wfs, n_interp=n_interp, random_seed=42)
    assert z_sampled.shape[0] == total_grid_size
    
    # Update number of interpolation points
    n_interp = z_sampled.shape[1]
    
    indices = interpolation_points_via_qrpivot(z_sampled)
    assert len(indices) == n_interp
    
    # Construct the interpolation points vectors
    zeta = construct_interpolation_vectors_matrix(wfs, indices)
    assert zeta.shape == (total_grid_size, n_interp)
    
    # Product basis defined on the interpolation grid
    z_interp = face_splitting_product(wfs[indices, :])
    assert z_interp.shape == (n_interp, n_products)
    
    # ISDF approximation to the product basis
    z_isdf = zeta @ z_interp
    assert z_isdf.shape == (total_grid_size, n_products)
    
    return z_isdf, n_interp
    

def error_l2(f1, f2, grid) -> dict:
    """  Error using L2 metric.
    
    :param f1: 
    :param f2: 
    :param grid: 
    :return: 
    """
    dV = grid.get_volume_element()
    assert dV > 0.0, "Must have a finite volume element"
    diff = f1 - f2
    # Integrate over grid points
    err_ij = np.sum(diff * diff, axis=0) * dV
    # Paper says ^2, but I think this should be RMSE, and therefore be ^1/2
    # NOTE, THIS COMPLETELY CHANGES THE ERROR, and really makes it such that one needs ~ 100 points
    # such that the max error is reasonable. However, for 100 points the plot comparisons 
    # below look entirely consistent
    err_ij = np.sqrt(err_ij)
    err = {'min': np.amin(err_ij), 'max': np.amax(err_ij), 'mean': np.mean(err_ij)}

    return err
    

def relative_error_l2(f1, f2, grid):
    """
    :param f1:  Target function
    :param f2:  Approximate function
    :param grid: system grid
    """
    mean_err = error_l2(f1, f2, grid)['mean']
    dV = grid.get_volume_element()
    # Volume element required in discretisation of <f1, f1>
    norm_f1 = np.sqrt(dV * np.sum(f1**2, axis=0))
    mean_norm_f1 = np.mean(norm_f1)
    return mean_err / mean_norm_f1


# This is a pain. Leave for now. In Ocotpus, will probably do the inner sum with the Poisson solver anyway
# def coulomb_error(product, approx_product, cube_grid):
#     delta_product = product - approx_product
#     
#     r = cube_grid.get_coords()
#     npoints = cube_grid.get_ngrids()
#     dV = cube_grid.get_volume_element()
# 
#     
#     for ij in range(0, nproducts):
#         for ir1 in range(0, npoints):
#             delta_product_ir1 = delta_product[ir2, ij]
#             summed = 0.0
#             for ir2 in range(0, npoints):
#                 # Ideally want  np.norm(r[ir1, :] - r[ir2, :]) to equal 1 for ir1 = ir2  
#                 summed += delta_product_ir1 * delta_product[ir2, ij] / np.norm(r[ir1, :] - r[ir2, :])
#             # TODO Add: - delta_product[ir1, ij] + (4 * np.pi * dV)
#             potential[ir1] = summed - delta_product[ir1, ij] * dV    

    

In [29]:
n_interpolation_vectors = [10, 25, 50, 75, 100, 200, 300]

for n in n_interpolation_vectors:
    z_isdf, n_interp = compute_approximate_product_matrix(wfs, n)
    print(n_interp, error_l2(z, z_isdf, cube_grid), "rel. 2-error", relative_error_l2(z, z_isdf, cube_grid))


Sizes: (1000, 9) (1000,) 9
9 {'min': 9.345572027838745e-05, 'max': 0.00251585330283558, 'mean': 0.0010730809703227925} rel. 2-error 0.7328820171707147
Sizes: (1000, 25) (1000,) 25
25 {'min': 3.8039235072977515e-05, 'max': 0.002069107549818602, 'mean': 0.0006128654717785342} rel. 2-error 0.41856867807116455
Sizes: (1000, 49) (1000,) 49
49 {'min': 1.4698690853164507e-05, 'max': 0.0010400652001405033, 'mean': 0.00025373877194010184} rel. 2-error 0.1732959470504291
Sizes: (1000, 81) (1000,) 81
81 {'min': 3.4549130138913135e-06, 'max': 0.00025182121449866877, 'mean': 2.560415693257912e-05} rel. 2-error 0.0174868688381078
Sizes: (1000, 100) (1000,) 100
100 {'min': 1.300363082060416e-06, 'max': 3.360789700461351e-05, 'mean': 6.212559011270438e-06} rel. 2-error 0.004242990888751269
Sizes: (1000, 196) (1000,) 196


AssertionError: inv_cct computed with inv should be symmetric

In [12]:
# Plot a small selection pair product states using PYSCF's Cube functionality
states = [0, n_occ**2 - 1]

for ij in states:
    # Exact product state
    fname = output_root / f'z_{ij}.cube'
    cube_grid.write(field=z[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())
    # Approximate product state
    fname = output_root / f'zisdf_{ij}.cube'
    cube_grid.write(field=z_isdf[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())


I think these results converge so quickly because one has a small molecule in a box, with no PBCs, and most of the charge centred around the molecule.


In [26]:
# Grid spacings in bohr. Copied from Cube.write() method
spacing = (cube_grid.box.T * [cube_grid.xs[1], cube_grid.ys[1], cube_grid.zs[1]]).T
print(spacing)


[[1.71139336 0.         0.        ]
 [0.         1.5716964  0.        ]
 [0.         0.         0.69191971]]
