# Testing of ISDF Expansion W.r.t. Pair Product Matrix

Testing Summary

* Set benzene up again: Compute the grid and wave functions
* Construct product basis matrix: L.H.S.
* Construct the interpolation vectors
* Implement Eta @ (Phi Psi), where Eta has shape(N_grid, N_interp) and PhiPsi has shape (N_iterp, Ne^2): R.H.S
* Numerically compare to exact solution (Compare L.H.S. to R.H.S.)
* Plot approximate against exact solution

In [2]:
"""Build Benzene molecule with a minimal GTO basis, usign PYSCF
"""
%load_ext autoreload
%autoreload 2

from pyscf import gto, dft
from rdkit import Chem

bohr_to_ang =  0.529177249

benzene_coordinates = """
 C         -0.65914       -1.21034        3.98683
 C          0.73798       -1.21034        4.02059
 C         -1.35771       -0.00006        3.96990
 C          1.43653       -0.00004        4.03741
 C         -0.65915        1.21024        3.98685
 C          0.73797        1.21024        4.02061
 H         -1.20447       -2.15520        3.97369
 H          1.28332       -2.15517        4.03382
 H         -2.44839       -0.00006        3.94342
 H          2.52722       -0.00004        4.06369
 H         -1.20448        2.15509        3.97373
 H          1.28330        2.15508        4.03386
"""

# PYSCF Molecule
mol = gto.Mole()
mol.atom = benzene_coordinates
mol.basis = 'def2-SVP'
mol.build()

# RDKIT molecule from SMILES data
rdkit_mol = Chem.MolFromSmiles("c1ccccc1")
rdkit_mol = Chem.AddHs(rdkit_mol)
# mol.atom_coords(unit='angstom'), symbols=[mol.atom_symbol(i) for i in range(12)]

# Solve SCF for restricted KS-LDA
mf = dft.RKS(mol)
mf.kernel()
# Occupied states for benzene, with this basis is 22
n_occ = 22

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
converged SCF energy = -229.930646528208


In [3]:
"""Define real-space grid
"""
from pathlib import Path

import numpy as np
from pyscf.tools import cubegen

from isdf_prototypes.visualise import write_xyz

# Root for outputs
output_root = Path('../outputs/isdf_product_expansion')

# Grid/cube settings
nx, ny, nz = 10, 10, 10
n_total = np.prod([nx, ny, nz])

# Generate the real-space grid
cube_grid = cubegen.Cube(mol, nx, ny, nz)
grid_points = cube_grid.get_coords()
assert grid_points.shape == (n_total, 3)

# Output full grid and clusters to .xyz with dummy species
# grid_xyz = write_xyz(['Sr']*n_total, grid_points * bohr_to_ang)
# with open(output_root / 'grid.xyz', "w") as fid:
#     fid.write(grid_xyz)

In [5]:
"""Occupied Wavefunctions
"""
# Generate MOs on real-space grid
wfs = np.empty(shape=(n_total, n_occ))
for i in range(n_occ):
    cube_file = output_root / f'benzene_mo{i}.cube'
    # pyscf expects a string for fname
    molecular_orbital = cubegen.orbital(mol, cube_file.as_posix(), mf.mo_coeff[:, i], nx=nx, ny=ny, nz=nz)
    wfs[:, i] = molecular_orbital.reshape(-1)


In [6]:
""" L.H.S. Full product basis matrix
"""
from isdf_prototypes.math_ops import face_splitting_product


# Construct product basis matrix
z = face_splitting_product(wfs)
assert z.shape == (n_total, n_occ**2)


In [21]:
from typing import Tuple

""" R.H.S.: Interpolation points and vectors, and approximate product matrix
"""
from isdf_prototypes.interpolation_points_via_qr import randomly_sample_product_matrix, interpolation_points_via_qrpivot
from isdf_prototypes.isdf_vectors import construct_interpolation_vectors_matrix


def compute_approximate_product_matrix(wfs, n_interp) -> Tuple[np.ndarray, int]:
    """ Compute an approximate product matrix using ISDF interpolation vectors
     
     z_{\text{isdf}} = \sum_\mu^{N_\mu} \zeta_{\mu}(\mathbf{r}) \phi_i(\mathbf{r}_\mu) \phi_j(\mathbf{r}_\mu)

    which can be represented as the matrix equation:
    
    Z_{\text{isdf}} = \Zeta (Phi \bullet \Psi), 
    
    where (Phi \bullet \Psi) is the face-splitting product.
    """
    # Avoid using global data
    total_grid_size = wfs.shape[0]
    n_products = wfs.shape[1]**2

    # Construct the interpolation points
    z_sampled = randomly_sample_product_matrix(wfs, n_interp=n_interp, random_seed=42)
    assert z_sampled.shape[0] == total_grid_size
    
    # Update number of interpolation points
    n_interp = z_sampled.shape[1]
    
    indices = interpolation_points_via_qrpivot(z_sampled)
    assert len(indices) == n_interp
    
    # Construct the interpolation points vectors
    zeta = construct_interpolation_vectors_matrix(wfs, indices)
    assert zeta.shape == (total_grid_size, n_interp)
    
    # Product basis defined on the interpolation grid
    z_interp = face_splitting_product(wfs[indices, :])
    assert z_interp.shape == (n_interp, n_products)
    
    # ISDF approximation to the product basis
    z_isdf = zeta @ z_interp
    assert z_isdf.shape == (total_grid_size, n_products)
    
    return z_isdf, n_interp
    

def error_l2(f1, f2, grid) -> dict:
    """  Error using L2 metric.
    
    :param f1: 
    :param f2: 
    :param grid: 
    :return: 
    """
    dV = grid.get_volume_element()
    assert dV > 0.0, "Must have a finite volume element"
    diff = f1 - f2
    # Integrate over grid points
    err_ij = np.sum(diff * diff, axis=0) * dV
    err_ij = np.square(err_ij)
    err = {'min': np.amin(err_ij), 'max': np.amax(err_ij), 'mean': np.mean(err_ij)}

    return err
    

In [23]:
n_interpolation_vectors = [10, 25, 50, 75, 100]

for n in n_interpolation_vectors:
    z_isdf, n_interp = compute_approximate_product_matrix(wfs, n)
    print(n_interp, error_l2(z, z_isdf, cube_grid))


9 {'min': 2.1190150945886306e-16, 'max': 3.3739849340340795e-11, 'mean': 3.579738461276301e-12}
25 {'min': 3.663177358038371e-18, 'max': 1.3619696057732124e-11, 'mean': 8.688034912593237e-13}
49 {'min': 9.497317134531506e-20, 'max': 3.6495170558354045e-12, 'mean': 8.248621375322717e-14}
81 {'min': 1.3054774330992181e-22, 'max': 6.058266954241666e-15, 'mean': 2.4328414635955734e-17}
100 {'min': 2.2198566957729384e-23, 'max': 7.338210849610755e-18, 'mean': 3.25109166923599e-20}


In [19]:
# Plot a small selection pair product states using PYSCF's Cube functionality
states = [0, n_occ**2 - 1]

for ij in states:
    # Exact product state
    fname = output_root / f'z_{ij}'
    cube_grid.write(field=z[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())
    # Approximate product state
    fname = output_root / f'zisdf_{ij}'
    cube_grid.write(field=z_isdf[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())


I think these results converge so quickly because one has a small molecule in a box, with no PBCs, and most of the charge centred around the molecule.
