In [7]:
import numpy as np

import rdkit
import rdkit.Chem.AllChem
from rdkit import Chem

from molecule_builder import build_molecules

In [8]:
# `build_molecules` returns an iterator that enumerates the action space
# from a given starting molecule.

start = Chem.MolFromSmiles('C')
[Chem.MolToSmiles(mol) for mol in build_molecules(start)]

['C#C', 'C=C', 'CC', 'C=O', 'CO', 'C=N', 'CN', 'C#N']

In [9]:
def build_nested(mol, num):
    """ Simulate tree search / RL by recursively calling build_molecules
    on it's own output a desired number of times. Note, this is NOT the
    most efficient way to iterate over sets of molecules at higher depths.
    (For that, we should de-duplicate each iteration). This is fast for a
    depth-first search, though.
    """
    
    for mol in build_molecules(mol):
        if num == 1:
            yield mol
            
        else:
            yield from build_nested(mol, num=num-1)

In [10]:
# Generate a random molecule with desired depth
Chem.MolToSmiles(next(build_nested(start, num=12)))

'C=NON(N=O)C(=O)OOOC#N'

In [11]:
# The tree size increases fast
[len({Chem.MolToSmiles(m) for m in build_nested(start, num=i)}) for i in range(1, 5)]

[8, 38, 234, 1383]

In [12]:
def get_fingerprint(mol, radius=2, fp_length=128):
    """ In case we want to work with fingerprints for development, this calculates a
    Morgan fingerprint as a numpy array """

    fingerprint = rdkit.Chem.AllChem.GetMorganFingerprintAsBitVect(mol, radius, fp_length)
    arr = np.zeros((fp_length,))
    rdkit.DataStructs.ConvertToNumpyArray(fingerprint, arr)
    return arr


mol = next(build_nested(start, num=12))
get_fingerprint(mol)

array([0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
       1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 1., 0., 1., 1., 0.])