In [1]:
import numpy as np
from core import Psi4Model, XYZLogger, MoleculeString #, load_xyz
from scipy.io import loadmat
import random

#importing the centered QM7 database
Z = np.load('../data/molecule_geometries/qm7_Z.npy')
R = np.load('../data/molecule_geometries/qm7_R_centered.npy')

# Molecule order: H2O,CO2,HCN,Acetonitrile,H2SO4,Benzene,Diborane
numbers = np.load('../data/molecule_geometries/nuclei_numbers.npy')
positions = np.load('../data/molecule_geometries/positions.npy')

# Load already existing geomtries
geometries = np.load('../data/molecule_geometries/optimized_geometries.npy', allow_pickle=True).tolist()
nuclei_numbers = np.load('../data/molecule_geometries/optimized_geometries_nuclei.npy', allow_pickle=True).tolist()

In [39]:
model = Psi4Model(method = 'b3lyp', basis = 'aug-cc-pvtz',
                  psi4_output = '../data/psi4_output/optimizing_output.dat', cores=3, memory=4e+09)

Psi4 Model initiated with method b3lyp and basis set aug-cc-pvtz


In [58]:
idx = 4
_Z = np.array(numbers[idx][np.where(numbers[idx] != 0)], dtype=int)
_R = np.array(positions[idx])[:len(_Z)]
energy, geometry = model.compute(_Z, _R,optimize=True)
model.clean()
nuclei_numbers.append(_Z)
geometries.append(geometry)

In [57]:
idx = 120
_Z = np.array(Z[idx][np.where(Z[idx] != 0)], dtype=int)
_R = np.array(R[idx])[:len(_Z)]
energy, geometry = model.compute(_Z, _R,optimize=True)
model.clean()
nuclei_numbers.append(_Z)
geometries.append(geometry)

In [44]:
np.save('../data/molecule_geometries/optimized_geometries', np.array(geometries))
np.save('../data/molecule_geometries/optimized_geometries_nuclei', np.array(nuclei_numbers))

In [63]:
geometries = np.load('../data/molecule_geometries/optimized_geometries.npy', allow_pickle=True)
nuclei_numbers = np.load('../data/molecule_geometries/optimized_geometries_nuclei.npy', allow_pickle=True)

In [65]:
nuclei_numbers

array([array([8, 8, 6]), array([6, 6, 1, 1, 1, 1]),
       array([16,  6,  6,  6,  6,  1,  1,  1,  1]),
       array([16,  8,  8,  8,  8,  1,  1])], dtype=object)

## Generating the final dataset

In [2]:
dataset_idxs = np.full((500),-1, dtype=int)
for i in range(500):
    randint = random.randint(0,7164)
    while randint in dataset_idxs:
        randint = random.randint(0,7164)
    dataset_idxs[i] = randint
dataset_idxs = np.sort(dataset_idxs)

In [6]:
Z_dataset = Z[dataset_idxs]
R_dataset = R[dataset_idxs]

In [7]:
np.shape(Z_dataset)

(500, 23)

In [8]:
np.shape(R_dataset)

(500, 23, 3)

In [16]:
np.save('../data/molecule_geometries/benchmarking_dataset_R', R_dataset)
np.save('../data/molecule_geometries/benchmarking_dataset_Z', Z_dataset)
np.save('../data/molecule_geometries/benchmark_idxs', dataset_idxs)