# ISF With More Granular Routine Call

Expose quantities for outputting:
* Density matrix, $P_{rr'}$ with shape $(N_p, N_p)$
* Interpolation indices 
* Contraction, defined by element-wise product: $ZC^T = P_{r, \mu} \cdot P_{r, \mu}$, with shape $(N_p, N_\mu)$
* Contraction,  defined by element-wise product: $CC^T = P_{\mu, \nu} \cdot P_{\mu, \nu}$, with shape $(N_\mu, N_\mu)$
    * By definition,  $CC^T$ is just the selection of $N_\mu$ rows of $ZC^T$
* Inverse $(CC^T)^{-1}$
* Matrix multiplication (contraction): $\Theta = (ZC^T) (CC^T)^{-1}$ with shape $(N_p, N_\mu)$

In [6]:
""" Imports
"""
import os
from pathlib import Path
import numpy as np
from string import Template

import py3Dmol
from pyscf import dft
from pyscf.tools import cubegen
# from pyscf import lib

from isdf_prototypes.helpers import (smiles_to_rdkitmol, pyscf_molecule, pyscf_density,
                                     pyscf_grid,add_grid_to_view, discretise_continuous_values,
                                     pyscf_molecular_orbitals, duplicate_indices,
                                     compute_approximate_product_matrix,
                                     mean_squared_error_regular_grid,
                                     find_interpolation_points_via_kmeans)
from isdf_prototypes.visualise import visualise_mo
from isdf_prototypes.math_ops import face_splitting_product

# Define project root
project_root = Path(os.getcwd()).parent.parent

# Settings
# PYSCF issues a warning stating that it is not running with threads
# lib.num_threads(1)
# print(f"PYSCF running with {lib.num_threads()} threads")

n_occ = 21
points = [10, 10, 10]
visualise = False


In [8]:
""" Restricted DFT with minimal basis
* Compute a batch of KS states using restricted KS-LDA
* Define the grid, compute density and compute wave functions
"""
# Molecule and SCF calculation
mol_benzene = pyscf_molecule(project_root / 'inputs/benzene.xyz', 'def2-SVP')
mf_benzene = dft.RKS(mol_benzene)
mf_benzene.kernel()    

# Define grid and compute density 
rdmol_benzene = smiles_to_rdkitmol('c1ccccc1')
cube_file = '../../outputs/benzene_density.cube'
rho_benzene = pyscf_density(mf_benzene, points, cube_file)
grid_benzene = pyscf_grid(mol_benzene, points)

# Visualise both
if visualise:
    with open(cube_file) as f:
        cube_data = f.read()
    view_benzene = py3Dmol.view(width=400,height=400)
    view_benzene = visualise_mo(cube_data, view_benzene, isoval=0.0001, mol=rdmol_benzene)
    view_benzene = add_grid_to_view(view_benzene, grid_benzene, 
                                    **{'radius': 0.1, 'color': 'blue', 'alpha': 0.6})
    view_benzene.show()

# Construct wave functions from MOs
cube_template = Template('../../outputs/benzene_$i.cube')
wfs = pyscf_molecular_orbitals(mf_benzene, points, cube_template)

n_total = np.prod(points)
n_product = n_occ * n_occ
assert wfs.shape == (n_total, n_occ)


converged SCF energy = -229.930646528208


In [9]:
""" Centroid/interpolation indices
"""

kmeans_opts = {'init': 'k-means++',
                'max_iter': 300,
                'tol': 0.0001,
                'verbose': 0,
                'copy_x': True,
                'random_state': None,
                'algorithm': 'lloyd'}

n_clusters = 60

continuous_clusters = find_interpolation_points_via_kmeans(n_clusters, 
                                                           grid_benzene, 
                                                           rho_benzene, 
                                                           **kmeans_opts)

discrete_clusters, interpolation_indices = discretise_continuous_values(continuous_clusters, 
                                                                        grid_benzene)

# Check that no centroid values have been discretised to the same point
dup_indices = duplicate_indices(discrete_clusters)

if dup_indices:
    print("Removing duplicate centroids ", dup_indices)
    discrete_clusters = np.delete(discrete_clusters, dup_indices) 
    interpolation_indices = np.delete(interpolation_indices, dup_indices) 
    n_clusters = len(interpolation_indices)

# Dump centroid points and indices
np.savetxt(project_root / f'mocked_data/centroid_points_{n_clusters}.txt', 
           discrete_clusters, 
           comments='', 
           header=f'{n_clusters}')

np.savetxt(project_root / f'mocked_data/centroid_indices_{n_clusters}.txt', 
           interpolation_indices,
           fmt='%d',
           comments='', 
           header=f'{n_clusters}')


In [10]:
""" Density matrix for phi (I call the batched wave functions wfs)
"""
n_states = n_occ
assert wfs.shape == (n_total, n_states), 'Shape of batched wfs is unexpected'
np.savetxt(project_root / f'mocked_data/phi.txt', 
           wfs, 
           comments='', 
           header=f'{n_total} {n_states}')     

# Contract over state index
P_rr = wfs @ wfs.T
assert P_rr.shape == (n_total, n_total), 'Shape of density matrix P_rr is unexpected'
np.savetxt(project_root / f'mocked_data/P_rr.txt', 
           P_rr, 
           comments='', 
           header=f'{n_total} {n_total}')


In [12]:
""" ZC^T

* Construct as P_rr[:, interpolation_indices] * P_rr[:, interpolation_indices]
* Construct Z and C^T. See if it agrees
"""
P_r_mu = P_rr[:, interpolation_indices]
assert P_r_mu.shape == (n_total, n_clusters)
np.savetxt(project_root / f'mocked_data/P_r_mu.txt', 
           P_r_mu, 
           comments='', 
           header=f'{n_total} {n_clusters}')

P_r_mu_alt = wfs @ wfs[interpolation_indices, :].T
assert np.allclose(P_r_mu, P_r_mu_alt)

# Element-wise product of density matrices
zct = P_r_mu * P_r_mu
assert zct.shape == (n_total, n_clusters)
np.savetxt(project_root / f'mocked_data/zct.txt', 
           zct, 
           comments='', 
           header=f'{n_total} {n_clusters}')

# Construct Z and C^T, multiply and compare to the separable definition
z = face_splitting_product(wfs)
c = z[interpolation_indices, :]
zct_alt = z @ c.T

assert np.allclose(zct, zct_alt)


In [13]:
""" CC^T
"""
P_mu_nu = P_r_mu[interpolation_indices, :]
assert P_mu_nu.shape == (n_clusters, n_clusters), 'Shape of density matrix P_mu_nu is unexpected'
np.savetxt(project_root / f'mocked_data/P_mu_nu.txt', 
           P_mu_nu, 
           comments='', 
           header=f'{n_clusters} {n_clusters}')     

# CC^T = P_mu_nu * P_mu_nu
cct = P_mu_nu * P_mu_nu
assert cct.shape == (n_clusters, n_clusters), 'Shape of CC^T is unexpected'
np.savetxt(project_root / f'mocked_data/cct.txt', 
           cct, 
           comments='', 
           header=f'{n_clusters} {n_clusters}')     

# CC^T = Sampling rows of ZC^T
cct_alt = zct[interpolation_indices, :]
assert np.allclose(cct, cct_alt)

# inv(CC^T)
cct_inv = np.linalg.pinv(cct)
np.savetxt(project_root / f'mocked_data/cct_inv.txt', 
           cct_inv, 
           comments='', 
           header=f'{n_clusters} {n_clusters}')     



In [14]:
""" Theta = (ZC^T) @ (CC^T)^{-1}
"""
from isdf_prototypes.isdf_vectors import construct_interpolation_vectors_matrix

# Theta
theta = zct @ cct_inv
assert theta.shape == (n_total, n_clusters)
np.savetxt(project_root / f'mocked_data/theta.txt', 
           theta, 
           comments='', 
           header=f'{n_total} {n_clusters}')   

# Compare to existing method
theta_original = construct_interpolation_vectors_matrix(wfs, interpolation_indices)

assert np.allclose(theta, theta_original)


In [37]:
""" ISDF approximation to product functions
"""
# Product basis defined on the interpolation grid
z_interp = face_splitting_product(wfs[interpolation_indices, :])
assert z_interp.shape == (n_clusters, n_product)

# ISDF approximation to the product basis
z_isdf = theta @ z_interp
assert z_isdf.shape == (n_total, n_product)

# Write to cube file
nx, ny, nz = points
cube_grid = cubegen.Cube(mol_benzene, nx, ny, nz)

# Required grid details
grid_values = cube_grid.get_coords()
print("dx: ", grid_values[100, 0] - grid_values[99, 0])
print("dy: ", grid_values[10, 1] - grid_values[9, 1])
print("dz: ", grid_values[1, 2] - grid_values[0, 2])
print("Box lengths (excluding resolution?)")
print(cube_grid.box)
print("Origin (== grid[0, :])", cube_grid.boxorig)


for ij in range(0, n_product):
    fname = project_root / f'mocked_data/z_isdf/z_isdf_{ij+1}.cube'
    cube_grid.write(field=z_isdf[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())

dx:  1.711393355849685
dy:  1.57169640193906
dz:  0.6919197067779379
Box lengths (excluding resolution?)
[[15.4025402   0.          0.        ]
 [ 0.         14.14526762  0.        ]
 [ 0.          0.          6.22727736]]
Origin (== grid[0, :] [-7.62678655 -7.07273774  4.45198379]


In [20]:
"""Exact Product Functions
"""
from isdf_prototypes.math_ops import face_splitting_product

# Exact product states
z_exact = face_splitting_product(wfs)
assert z_exact.shape == (n_total, n_occ**2)

# Write exact product states to cube file
nx, ny, nz = points
cube_grid = cubegen.Cube(mol_benzene, nx, ny, nz)
for ij in range(0, n_product):
    fname = project_root / f'mocked_data/z_isdf/z_exact_{ij+1}.cube'
    cube_grid.write(field=z_isdf[:, ij].reshape(nx, ny, nz) , fname=fname.as_posix())

# Quantify error w.r.t. ISDF functions
mse_product_err = np.empty(shape=n_product)
print("State, mean error over all grid points:")

for ij in range(0, z_isdf.shape[1]):
    mse_product_err[ij] = mean_squared_error_regular_grid(z_exact[:, ij], z_isdf[:, ij])
    print(ij, mse_product_err[ij])

print("Min, Mean, Max relative error:", np.amin(mse_product_err), np.mean(mse_product_err), np.amax(mse_product_err))

np.savetxt(project_root / f'mocked_data/z_isdf.txt', 
           z_isdf, 
           comments='', 
           header=f'{n_total} {n_product}')   


State, mean error over all grid points:
0 6.228987121881255e-09
1 5.6424581100907835e-09
2 9.900521675658914e-09
3 5.118618266225407e-09
4 7.472500497335831e-09
5 7.584397866996941e-09
6 2.361011911604382e-08
7 1.7169136487247196e-08
8 3.6823327098729346e-08
9 3.239367359015613e-08
10 1.95325937701124e-08
11 2.360031265410456e-09
12 4.7978771914847284e-08
13 5.618114584600674e-10
14 1.2991609485848584e-09
15 2.0579063706107082e-08
16 2.439774970269758e-07
17 2.335595486054997e-08
18 3.636769479423473e-08
19 3.304498308306335e-08
20 5.885468816261369e-07
21 5.6424581100907835e-09
22 5.029186426726719e-09
23 4.925347792683167e-09
24 6.3260518729580876e-09
25 3.979965538914459e-09
26 5.2714693523664905e-09
27 4.33581282225368e-09
28 2.8710320265266114e-09
29 6.32666197960782e-09
30 1.1722198290924283e-08
31 6.650969310855186e-09
32 1.582150163099592e-09
33 2.118089793373733e-08
34 2.8542675546550643e-09
35 1.3781275201552765e-09
36 3.1294473920802736e-09
37 1.7352450104656548e-07
38 1.069

In [21]:
""" Compare fortran and python quantities

If I rerun the mocking data, I have to also re-run fortran. Changed theta from disagreeing, to agreeing, which was weird
Suggests either a) PYSCF or b) numpy is using threading, and it's affecting the numerical comparison
I'm still guessing the fortran problem is in the inversion
"""
py_data_path = project_root / "mocked_data"
f90_data_path = project_root / "fortran_data"

ref_P_rr = np.loadtxt(py_data_path / "P_rr.txt", skiprows=1)
f90_P_rr = np.loadtxt(f90_data_path / "f90_P_rr_alt.txt", skiprows=1)
print("P_rr agreement", np.allclose(f90_P_rr, ref_P_rr))

ref_P_r_mu = np.loadtxt(py_data_path / "P_r_mu.txt", skiprows=1)
f90_P_r_mu = np.loadtxt(f90_data_path / "f90_P_r_mu.txt", skiprows=1)
print("P_r_mu agreement", np.allclose(f90_P_r_mu, ref_P_r_mu))

ref_P_mu_nu = np.loadtxt(py_data_path / "P_mu_nu.txt", skiprows=1)
f90_P_mu_nu = np.loadtxt(f90_data_path / "f90_P_mu_nu.txt", skiprows=1)
print("P_mu_nu agreement", np.allclose(f90_P_mu_nu, ref_P_mu_nu))

ref_zct = np.loadtxt(py_data_path / "zct.txt", skiprows=1)
f90_zct = np.loadtxt(f90_data_path / "f90_zct.txt", skiprows=1)
print("ZC^T agreement", np.allclose(f90_zct, ref_zct))

ref_cct = np.loadtxt(py_data_path / "cct.txt", skiprows=1)
f90_cct = np.loadtxt(f90_data_path / "f90_cct.txt", skiprows=1)
f90_cct_alt = np.loadtxt(f90_data_path / "f90_cct_alt.txt", skiprows=1)
print("CC^T agreement", np.allclose(f90_cct, ref_cct))
print("CC^T agreement with alt construction", np.allclose(f90_cct_alt, ref_cct))


ref_cct_inv = np.loadtxt(py_data_path / "cct_inv.txt", skiprows=1)
f90_cct_inv = np.loadtxt(f90_data_path / "f90_cct_inv.txt", skiprows=1)
print("INV(CC^T) agreement", np.allclose(f90_cct_inv, ref_cct_inv))

np_cct_inv = np.linalg.inv(f90_cct)
print("Agreement between np inverse of parsed CC^T, and ref (CC^T)^{-1}", np.allclose(np_cct_inv, ref_cct_inv))
print("Agreement between np inverse of parsed CC^T, and fortran (CC^T)^{-1}", np.allclose(np_cct_inv, f90_cct_inv))

# for i in range(60):
#     for j in range(60):
#         print(i, j, ref_cct_inv[i, j], f90_cct_inv[i, j], np.abs(ref_cct_inv[i, j] - f90_cct_inv[i, j]) > 1.e-8)

ref_theta = np.loadtxt(py_data_path / "theta.txt", skiprows=1)
f90_theta = np.loadtxt(f90_data_path / "f90_theta.txt", skiprows=1)
print("Theta agreement", np.allclose(f90_theta, ref_theta))

ref_z_isdf = np.loadtxt(py_data_path / "z_isdf.txt", skiprows=1)
f90_z_isdf = np.loadtxt(f90_data_path / "f90_z_isdf.txt", skiprows=1)
print("Z_isdf agreement", np.allclose(f90_z_isdf, ref_z_isdf))


P_rr agreement True
P_r_mu agreement True
P_mu_nu agreement True
ZC^T agreement True
CC^T agreement True
CC^T agreement with alt construction True
INV(CC^T) agreement False
Agreement between np inverse of parsed CC^T, and ref (CC^T)^{-1} True
Agreement between np inverse of parsed CC^T, and fortran (CC^T)^{-1} False
Theta agreement True
Z_isdf agreement True


In [None]:
#