In [1]:
import sys
sys.path.append('..')
#WORKS ON TORCH==1.4.1, schnetpack == 0.3
from aimnet import load_AIMNetMT_ens, load_AIMNetSMD_ens
import ase
import ase.optimize
import ase.md
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


### Load pre-trained esnsembles of AIMNet models

load_AIMNetMT_ens - loads a model trained to wB97x/def2-TZVPP energies, atomic electric moments (charges, dipoles, etc) and volumes

load_AIMNetSMD_ens - loads a model obtained by transfer learning towards SMD-wB97x/def2-TZVPP energies

In [4]:
import torch
import schnetpack as spk
from schnetpack.datasets import QM9

'''
This code runs a pretrained AIMNet model on QM9 dataset and extract embeddings from it. SchNetPack here helps to load the QM9 dataset
and also convert the db molecules to input ready for the AIMNet neural network

    Args:
        QM9_FILEPATH            - filepath where qm9 db database is being kept
        QM9_RANGE               - range of QM9 data you want to extract embeddings of
        AFV_N_FEATURES          - number of features for atomic feature vectors of the pretrained AIMNet model
        AEF_N_FEATURES          - number of features for atomic environment vectors of the pretrained AIMNet model


    Returns:
        AFV_EMBS                - initially empty np array, will hold the atomic feature vector embeddings of AIMNet as a numpy array of (n_data*n_features)
        AEF_EMBS                - initially empty np array, will hold the atomic environment vector embeddings of AIMNet as a numpy array of (n_data*n_features)
'''


QM9_FILEPATH = '../../data/datasets/QM9/qm9.db'
QM9_RANGE = [0,10000]
AFV_N_FEATURES = 16
AEF_N_FEATUERS = 256


#Load QM9 filepath using schnetpack.datasets QM9 method, which loads QM9 db file efficiently
qm9_data = QM9(QM9_FILEPATH,download=False,remove_uncharacterized=True)

#initialize empty arrays that will hold afv and aef embeddings of AIMNet
afv_embs = np.zeros((1,AFV_N_FEATURES))
aef_embs = np.zeros((1,AEF_N_FEATUERS))


#a difctionary converting atomic numbers available to integers
atomic_numbers_to_idx = {1:  0,
                        6:  1,
                        7:  2,
                        8:  3,
                        16: 4,
                        9:  5,
                        17: 6}


#Run through a range of molecules in QM9
for molecule_i in range(QM9_RANGE[0],QM9_RANGE[1]):

    #simple loading bar
    if molecule_i % 1000 == 0:
        print(molecule_i)

    #load properties of molecule
    atoms, props = qm9_data.get_properties(molecule_i)

    #load Atoms Converter,
    #which converts loaded ase (Atomic Simulation Environment) molecule to be ready as neural network input
    converter = spk.data.AtomsConverter(device='cpu')

    #convert ase input into xyz input ready for 
    inputs = converter(atoms)

    #get the atomic numbers from the inputs
    atomic_numbers = inputs['_atomic_numbers'].detach().numpy()

    #convert atomic numbers to corrsponding integer value from dictionary
    atomic_numbers_in_idx = [atomic_numbers_to_idx[atomic_numbers[0][each_element]] for each_element in range(len(inputs['_atomic_numbers'][0]))]

    #convert the integerized atomic numebrs into a tensor
    atomic_numbers_in_idx = torch.tensor([atomic_numbers_in_idx])

    #load pretrained AIMNe model 
    model_gas = load_AIMNetMT_ens()

    #get prediction, atomic feature vectors, atomic environment vectors from 
    pred, afv, aef = model_gas(inputs['_positions'],atomic_numbers_in_idx)

    #
    afv = afv.detach().numpy()
    aef = aef.detach().numpy()

    afv_embs = np.vstack((afv_embs,afv[0]))
    aef_embs = np.vstack((aef_embs,aef[0]))



0


OperationalError: unable to open database file

In [3]:
np.savetxt('../../afv.csv',afv_embs,delimiter=',')
np.savetxt('../../aef.csv',aef_embs,delimiter=',')