# Import packages

In [21]:
from rdkit import Chem
from mol_weld.weld import weld_r_groups

# Define core and substituent SMILES
To denote R-groups in a SMILES string the placeholder [*:x] is used with x indicating the R-group number. The same is done for the substituent positions. So when adding a H atom give the SMILES of H-A (where A is the placeholder)

In case a substituent consists of multiple attached groups (for example a trimethyl) or a group that is attached via multiple bonds, multiple placeholders need to be used.

For example:
* O([*:4])[*:4] for A-O-A
* C[*:1].C[*:1].C[*:1] for adding 3 methyls C-A.C-A.C-A

\*dots represent fragments

In [38]:
# example of how to make paracetamol

smiles = 'C1([*:1])=CC=C([*:2])C=C1'

R1 = 'O[*:1]'
R2 = '[*:2]NC(=O)C'


In [39]:
# turn SMILES into rdkit molecule
core = Chem.MolFromSmiles(smiles)
# make aromatic bonds explicit
Chem.Kekulize(core, clearAromaticFlags=True)

# do the same for r-groups
rgroups = Chem.MolFromSmiles(R1)
mol = Chem.MolFromSmiles(R2)
# combine r-groups in the same rdkit mol object
rgroups = Chem.CombineMols(rgroups, mol)

Chem.Kekulize(rgroups, clearAromaticFlags=True)

In [40]:
welded_mol = weld_r_groups(core, rgroups)
Chem.MolToSmiles(welded_mol)

'CC(=O)Nc1ccc(O)cc1'