In [13]:
import numpy as np
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
import nfp

In [2]:
redox_model = tf.keras.models.load_model(
    '/projects/rlmolecule/pstjohn/models/20210107_redox_model/',
    compile=False)

In [6]:
def atom_featurizer(atom):
    """ Return an integer hash representing the atom type
    """

    return str((
        atom.GetSymbol(),
        atom.GetNumRadicalElectrons(),
        atom.GetFormalCharge(),
        atom.GetChiralTag().name,
        atom.GetIsAromatic(),
        nfp.get_ring_size(atom, max_size=6),
        atom.GetDegree(),
        atom.GetTotalNumHs(includeNeighbors=True)
    ))


def bond_featurizer(bond, flipped=False):

    if not flipped:
        atoms = "{}-{}".format(
            *tuple((bond.GetBeginAtom().GetSymbol(),
                    bond.GetEndAtom().GetSymbol())))
    else:
        atoms = "{}-{}".format(
            *tuple((bond.GetEndAtom().GetSymbol(),
                    bond.GetBeginAtom().GetSymbol())))

    bstereo = bond.GetStereo().name
    btype = str(bond.GetBondType())
    ring = 'R{}'.format(nfp.get_ring_size(bond, max_size=6)) if bond.IsInRing() else ''

    return " ".join([atoms, btype, ring, bstereo]).strip()


preprocessor = nfp.SmilesPreprocessor(
    atom_features=atom_featurizer, bond_features=bond_featurizer, explicit_hs=False)

preprocessor.from_json('/projects/rlmolecule/pstjohn/models/20210107_redox_model/preprocessor.json')

In [20]:
def pred_redox(smiles):
    return redox_model.predict(
        {key: tf.constant(np.expand_dims(val, 0))
         for key, val in preprocessor.construct_feature_matrices(smiles, train=False).items()}).flatten()

In [21]:
pred_redox('CC(=S)C1=C(C)[CH]CCC1(C)C')

array([ 0.71126354, -0.49478754], dtype=float32)

In [24]:
import pandas as pd
gng = pd.read_csv('20210109_gng_radicals.csv.gz', index_col=0)

In [26]:
gng[gng.smiles == 'CC(=S)C1=C(C)[CH]CCC1(C)C']

Unnamed: 0,smiles,time,real_reward,atom_type,buried_vol,max_spin,atom_index,ie,ea,bde
823300,CC(=S)C1=C(C)[CH]CCC1(C)C,2021-01-08 21:20:03.435049,181.28307,C,71.99054,0.332517,3,0.711264,-0.494788,79.90403
