# Find interpolation points via subsampling product basis, and performing QR decomposition

In [72]:
"""Build Benzene molecule with a minimal GTO basis, usign PYSCF
"""
%load_ext autoreload
%autoreload 2

from pyscf import gto, dft
from rdkit import Chem

bohr_to_ang =  0.529177249

benzene_coordinates = """
 C         -0.65914       -1.21034        3.98683
 C          0.73798       -1.21034        4.02059
 C         -1.35771       -0.00006        3.96990
 C          1.43653       -0.00004        4.03741
 C         -0.65915        1.21024        3.98685
 C          0.73797        1.21024        4.02061
 H         -1.20447       -2.15520        3.97369
 H          1.28332       -2.15517        4.03382
 H         -2.44839       -0.00006        3.94342
 H          2.52722       -0.00004        4.06369
 H         -1.20448        2.15509        3.97373
 H          1.28330        2.15508        4.03386
"""

# PYSCF Molecule
mol = gto.Mole()
mol.atom = benzene_coordinates
mol.basis = 'def2-SVP'
mol.build()

# RDKIT molecule from SMILES data
rdkit_mol = Chem.MolFromSmiles("c1ccccc1")
rdkit_mol = Chem.AddHs(rdkit_mol)
# mol.atom_coords(unit='angstom'), symbols=[mol.atom_symbol(i) for i in range(12)]

# Solve SCF for restricted KS-LDA
mf = dft.RKS(mol)
mf.kernel()
# Occupied states for benzene, with this basis is 22
n_occ = 22

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
converged SCF energy = -229.930646528208


In [41]:
"""Define real-space grid
"""
from pathlib import Path

import numpy as np
from pyscf.tools import cubegen

from isdf_prototypes.visualise import write_xyz


# Root for outputs
output_root = Path('../outputs/points_via_qr')

# Grid/cube settings
nx, ny, nz = 10, 10, 10
n_total = np.prod([nx, ny, nz])

# Generate the real-space grid
cube_grid = cubegen.Cube(mol, nx, ny, nz)
grid_points = cube_grid.get_coords()
assert grid_points.shape == (n_total, 3)

# Output full grid and clusters to .xyz with dummy species
grid_xyz = write_xyz(['Sr']*n_total, grid_points * bohr_to_ang)
with open(output_root / 'grid.xyz', "w") as fid:
    fid.write(grid_xyz)

In [42]:
"""Build the electron density
"""
from pathlib import Path

cube_file = output_root / 'benzene_density.cube'

# 1-electron Density matrix
dm = mf.make_rdm1()
rho = cubegen.density(mol, cube_file.as_posix(), dm, nx=nx, ny=ny, nz=nz)
rho = rho.reshape(-1)
assert rho.shape == (n_total,)

In [43]:
"""KS molecular orbitals
"""
# Generate MOs on real-space grid
wfs = np.empty(shape=(n_total, n_occ))
for i in range(n_occ):
    cube_file = output_root / f'benzene_mo{i}.cube'
    # pyscf expects a string for fname
    molecular_orbital = cubegen.orbital(mol, cube_file.as_posix(), mf.mo_coeff[:, i], nx=nx, ny=ny, nz=nz)
    wfs[:, i] = molecular_orbital.reshape(-1)


In [45]:
""" Interpolation point selection
"""
from isdf_prototypes.interpolation_points_via_qr import randomly_sample_product_matrix, interpolation_points_via_qrpivot

# Create sub-sampled pair-product Z matrix
n_interp = 100 # i.e. ~ sqrt(n_total)
print(f'Requested {n_interp} interpolation points')

z_sampled = randomly_sample_product_matrix(wfs, n_interp, random_seed=42)
assert z_sampled.shape[0] == n_total, 'Number of grid points'

# Update number of interpolation points
n_interp = z_sampled.shape[1]
print(f'Returned {n_interp} interpolation points')

# Use QR decomposition with pivoting to find interpolation points
pivot_indices = interpolation_points_via_qrpivot(z_sampled)
print("Row indices to be used as interpolation points:")
print(pivot_indices)

# Export interpolation grid points
interp_grid_xyz = write_xyz(['U']*n_total, grid_points[pivot_indices, :] * bohr_to_ang)
with open(output_root / 'interpolation_points.xyz', "w") as fid:
    fid.write(interp_grid_xyz)


Requested 100 interpolation points
Returned 100 interpolation points
Row indices to be used as interpolation points:
[243 244 245 246 254 255 256 324 325 326 334 335 342 343 344 345 346 352
 353 354 355 356 363 364 365 366 374 375 376 423 424 425 433 434 435 436
 443 444 445 446 453 454 455 456 463 464 465 466 474 475 524 525 532 533
 534 535 536 544 545 546 553 554 555 556 563 564 565 566 567 575 623 624
 625 633 634 635 636 643 644 645 646 647 653 654 655 656 657 663 664 665
 666 673 674 675 744 745 746 754 755 756]


In [73]:
# Visualisation in notebook
import py3Dmol

from isdf_prototypes.visualise import visualise_mo

# Initialise
view = py3Dmol.view(width=400,height=400)

# Atomic structure + density isosurface
with open(output_root / 'benzene_density.cube') as f:
    cube_data = f.read()
view = visualise_mo(cube_data, view, isoval=0.001, mol=rdkit_mol)

# Full grid
for point in grid_points:
    x, y, z = point * bohr_to_ang
    view.addSphere({'center': {'x': x, 'y': y, 'z': z}, 'radius': 0.05, 'color': 'yellow'})

# Interpolation points
for i in pivot_indices:
    x, y, z = grid_points[i, :] * bohr_to_ang
    view.addSphere({'center': {'x': x, 'y': y, 'z': z}, 'radius': 0.2, 'color': 'red'})

# view.setStyle({'sphere': {}})
view.show()
