# Building Blocks

> building block related functions

In [None]:
#| default_exp building_blocks

In [None]:
#| hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

In [None]:
#| export

from __future__ import annotations
import chem_templates
from chem_templates.imports import *
from chem_templates.utils import *
from chem_templates.chem import Molecule, to_mol, to_smile

from rdkit import Chem
from rdkit.Chem import rdChemReactions as Reactions

from chem_templates.synt_on.src.SyntOn_BBs import mainSynthonsGenerator
from chem_templates.synt_on.src.SyntOn_Classifier import BBClassifier

In [None]:
#| export

def smile_to_synthon(smile: str, # smiles string to convert
                     keep_pg: bool=False # if True, results include synthons with un-removed protecting groups
            ) -> Tuple[list[str], list[list[str]]]: # Returns paired list of SMILES and reaction classes

    classes = BBClassifier(mol=to_mol(smile))
    
    azoles,fSynt = mainSynthonsGenerator(smile, keep_pg, classes, returnBoolAndDict=True)

    smiles = list(fSynt.keys())
    rxns = list(fSynt.values())
    rxns = [list(i) for i in rxns]
    return smiles, rxns

In [None]:
assert smile_to_synthon('COC(=O)c1ccnc(CNC(=O)NCC2(C)CC(N=C=O)CC(C)(C)C2)c1')[0] == [
    'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc(C(=O)O)ccn2)C1',
    'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1']

In [None]:
#| export

def get_synthon_marks(smile: str # input synthon smiles string
                     ) -> list[str]: # list of marks
    '''
    extracts reaction tag marks from synthon
    
    ie `'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1' -> ['C:10']`
    '''
    pat = re.compile("\[\w*:\w*\]")
    current_marks = [smile[m.start() + 1] + ":" + smile[m.end() - 3:m.end() - 1]
                    for m in re.finditer(pat, smile)]
    return deduplicate_list(current_marks)

In [None]:
assert get_synthon_marks('CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1') == ['C:10']

In [None]:
#| export

SYNTHON_VALID_COMBINATIONS = {'C:10': ['N:20', 'O:20', 'C:20', 'c:20', 'n:20', 'S:20'],
                              'c:10': ['N:20', 'O:20', 'C:20', 'c:20', 'n:20', 'S:20'],
                              'c:20': ['N:11', 'C:10', 'c:10'], 
                              'C:20': ['C:10', 'c:10'],
                              'c:21': ['N:20', 'O:20', 'n:20'], 
                              'C:21': ['N:20', 'n:20'],
                              'N:20': ['C:10', 'c:10', 'C:21', 'c:21', 'S:10'], 
                              'N:11': ['c:20'],
                              'n:20': ['C:10', 'c:10', 'C:21', 'c:21'], 
                              'O:20': ['C:10', 'c:10', 'c:21'],
                              'S:20': ['C:10', 'c:10'], 
                              'S:10': ['N:20'], 
                              'C:30': ['C:40', 'N:40'],
                              'C:40': ['C:30'], 
                              'C:50': ['C:50'], 
                              'C:70': ['C:60', 'c:60'],
                              'c:60':['C:70'], 
                              'C:60': ['C:70'], 
                              'N:40': ['C:30'] }

In [None]:
#| export

def add_reconstruction_atoms(smile: str # synthon smiles string
                            ) -> str: # synthon reconstruction string
    
    'augments synthon annotations (ie c:10) with dummy atoms for fusion'
    labels = [10, 20, 30, 40, 50, 60, 70, 21, 11] # annotation numbers
    atomsForMarking = [23, 74, 72, 104, 105, 106, 107, 108, 109] # dummy atoms
    atomsForMarkingForDoubleBonds = [72, 104, 105]
    
    mol = to_mol(smile)
    mol = Chem.AddHs(mol)
    
    for atom in mol.GetAtoms():
        if atom.GetAtomMapNum() != 0:
            repl = atomsForMarking[labels.index(atom.GetAtomMapNum())]
            replCount = 0
            for neighbor in atom.GetNeighbors():
                if neighbor.GetAtomicNum() == 1:
                    mol.GetAtomWithIdx(neighbor.GetIdx()).SetAtomicNum(repl)
                    replCount += 1
                    if repl not in atomsForMarkingForDoubleBonds and replCount == 1:
                        break
                    elif replCount == 2:
                        break
                        
    mol = Chem.RemoveHs(mol)
    return to_smile(mol)

def remove_reconstruction_atoms(smile: str # synthon reconstruction string
                               ) -> str: # synthon smiles string
    'removes dummy atoms for fusion'
    atomsForMarking = set([23, 74, 72, 104, 105, 106, 107, 108, 109])
    mol = to_mol(smile)
    
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() in atomsForMarking:
            atom.SetAtomicNum(1)
            
    mol = Chem.AddHs(mol)
    mol = Chem.RemoveHs(mol)
            
    return to_smile(mol)

In [None]:
assert add_reconstruction_atoms('CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1'
                        ) == 'CC1(C)CC(N[C:10](=O)[V])CC(C)(CNC(=O)NCc2cc([C:10](=O)[V])ccn2)C1'

assert remove_reconstruction_atoms('CC1(C)CC(N[C:10](=O)[V])CC(C)(CNC(=O)NCc2cc([C:10](=O)[V])ccn2)C1'
                           ) == 'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1'

assert remove_reconstruction_atoms(add_reconstruction_atoms(
    'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1'
    )) == 'CC1(C)CC(N[CH:10]=O)CC(C)(CNC(=O)NCc2cc([CH:10]=O)ccn2)C1'

In [None]:
#| export

class Synthon(Molecule):
    def __init__(self,
                 synthon_smile: str, # synthon smiles string
                 reaction_tags: list[str]=None, # reaction class tags
                 parents: Optional[list[Molecule]]=None, # parent molecule
                 data: Optional[dict]=None # data
                ):
        super().__init__(synthon_smile, data)
        
        if parents:
            self.add_data({'parents' : parents})
            
        if reaction_tags:
            self.add_data({'reaction_tags' : reaction_tags})
            
        self.recon_smile = add_reconstruction_atoms(synthon_smile)
        self.recon_mol = to_mol(self.recon_smile)
        self.marks = set(get_synthon_marks(self.recon_smile))
        self.compatible_marks = set(flatten_list([SYNTHON_VALID_COMBINATIONS.get(i, []) for i in self.marks]))
        
    def is_compatible(self, synthon: Synthon) -> bool:
        overlaps = self.compatible_marks.intersection(synthon.marks)
        return bool(overlaps)

In [None]:
#| export

def molecule_to_synthon(molecule: Molecule # input Molecule
                       ) -> list[Synthon]: # output list of synthons
    'Converts `molecule` into a list of corresponding synthons'
    synthon_smiles, rxn_tags = smile_to_synthon(molecule.smile)
    outputs = []
    for i in range(len(synthon_smiles)):
        outputs.append(Synthon(synthon_smiles[i], rxn_tags[i], [molecule]))
    return outputs

In [None]:
molecule = Molecule('COC(=O)c1ccnc(CNC(=O)NCC2(C)CC(N=C=O)CC(C)(C)C2)c1')
synthons = molecule_to_synthon(molecule)
assert len(synthons)==2
assert synthons[0].data['parents'] == [molecule]
assert synthons[0].data['parents'] == synthons[1].data['parents']
assert synthons[0].recon_smile == 'CC1(C)CC(N[C:10](=O)[V])CC(C)(CNC(=O)NCc2cc(C(=O)O)ccn2)C1'

In [None]:
#| export

class FusionReaction():
    def __init__(self, 
                 name: str, # reaction name
                 rxn_smarts: str # reaction smarts
                ):
        self.name = name
        self.rxn_smarts = rxn_smarts
        self.rxn = Reactions.ReactionFromSmarts(rxn_smarts)
        self.rxn.Initialize()
        
    def is_reactant(self, 
                    synthon1: Synthon, # first reactant
                    synthon2: Optional[Synthon]=None # second reactant
                   ) -> bool: # bool, True if synthons match reaction pattern
        
        if synthon2 is None:
            output = self.rxn.IsMoleculeReactant(synthon1.recon_mol)
        else:
            reactants = self.rxn.GetReactants()
            try:
                order1 = [synthon1.recon_mol.HasSubstructMatch(reactants[0]),
                          synthon2.recon_mol.HasSubstructMatch(reactants[1])]
                
                order2 = [synthon2.recon_mol.HasSubstructMatch(reactants[0]),
                          synthon1.recon_mol.HasSubstructMatch(reactants[1])]
        
                output = all(order1) or all(order2)
            except:
                output = False
            
        return output
    
    def _react(self, 
               synthon1: Synthon, # first reactant
               synthon2: Synthon # second reactant
              ) -> list[str]: # list of product SMILES strings
        products = self.rxn.RunReactants((synthon1.recon_mol, 
                                          synthon2.recon_mol))
        if not products:
            products = self.rxn.RunReactants((synthon2.recon_mol, 
                                          synthon1.recon_mol))

        if products:
            products = flatten_list(products)
        else:
            products = []
            
        products = [to_smile(i) for i in products]
        return products
    
    def react(self, 
              synthon1: Synthon, # synthon reactant 1
              synthon2: Synthon # synthon reactant 2
             ) -> list[Synthon]: # list of product synthons
        products = self._react(synthon1, synthon2)
        outputs = []
        for recon_smile in products:
            synthon_smile = remove_reconstruction_atoms(recon_smile)
            result = Synthon(synthon_smile, reaction_tags=[self.name], parents=[synthon1, synthon2])
            outputs.append(result)
        return outputs
    
    def react_to_dict(self, 
                      synthon1: Synthon, # synthon reactant 1
                      synthon2: Synthon # synthon reactant 2
                     ) -> list[dict]: # dictionary of product synthon strings
        products = self._react(synthon1, synthon2)
        outputs = []
        for recon_smile in products:
            synthon_smile = remove_reconstruction_atoms(recon_smile)
            
            result = {
                'synthon_smile' : synthon_smile,
                'reconstruction_smile' : recon_smile,
                'reaction_tags' : self.name
            }            
            outputs.append(result)
        return outputs
    
    def __repr__(self) -> str:
        return f'Reaction: {self.name}'

In [None]:
#| export

class ReactionGroup():
    'holds reactions beloning to the same type of transform'
    def __init__(self, 
                 name: str, # group name
                 reactions: list[FusionReaction] # list of reactions in group
                ):
        self.name = name
        self.reactions = reactions
        
    def get_matching_reactions(self, 
                               synthon1: Synthon, # first synthon reactant
                               synthon2: Optional[Synthon]=None # second synthon reactant 
                              ) -> list[FusionReaction]: # list of matching reactions
        '''
        checks input synthons against `self.reactions` and returns matches
        '''
        return [i for i in self.reactions if i.is_reactant(synthon1, synthon2)]
    
    def dump(self) -> dict:
        output = {
            'name' : self.name,
            'reactions' : [
                {'name' : i.name, 'smarts' : i.rxn_smarts} for i in self.reactions
            ]
        }
        return output
        
    @classmethod
    def from_dict(cls, input_dict: dict) -> ReactionGroup:
        reactions = [FusionReaction(i['name'], i['smarts']) for i in input_dict.get('reactions', [])]
        return cls(input_dict['name'], reactions)
    
    @classmethod
    def from_file(cls, filename: str) -> ReactionGroup:
        with open(filename, 'r') as f:
            input_dict = json.load(f)
            return cls.from_dict(input_dict)
    
    def __repr__(self):
        return f'Reaction Class: {self.name}' + '\n\t' + '\n\t'.join([i.__repr__() for i in self.reactions])

In [None]:
#| export

class ReactionUniverse():
    def __init__(self, 
                 name: str, # rxn universe name
                 reaction_groups: list[ReactionGroup] # list of reaction groups
                ):
        self.name = name
        self.reaction_groups = reaction_groups
        self.reaction_groups_dict = {i.name : i for i in self.reaction_groups}
        
    def add_group(self, reaction_group: ReactionGroup # group to add
                 ):
        self.reaction_groups.append(reaction_group)
        self.reaction_groups_dict[reaction_group.name] = reaction_group
        
    def get_matching_reactions(self, 
                               synthon1: Synthon, # synthon reactant 1
                               synthon2: Optional[Synthon]=None # synthon reactant 2
                              ) -> list[FusionReaction]: # list of matching reactions
        outputs = []
        for group in self.reaction_groups:
            outputs += group.get_matching_reactions(synthon1, synthon2)
        return outputs

In [None]:
#| export

REACTION_GROUP_NAMES = ['O-acylation',
 'Olefination',
 'Condensation_of_Y-NH2_with_carbonyl_compounds',
 'Amine_sulphoacylation',
 'C-C couplings',
 'Radical_reactions',
 'N-acylation',
 'O-alkylation_arylation',
 'Metal organics C-C bong assembling',
 'S-alkylation_arylation',
 'Alkylation_arylation_of_NH-lactam',
 'Alkylation_arylation_of_NH-heterocycles',
 'Amine_alkylation_arylation']

FUSION_REACTION_PATH = chem_templates.__path__[0] + '/synt_on/fusion_reactions'
REACTION_GROUPS = [ReactionGroup.from_file(f'{FUSION_REACTION_PATH}/{i}.json') for i in REACTION_GROUP_NAMES]
REACTION_GROUP_DICT = {i.name : i for i in REACTION_GROUPS}

In [None]:
rxn_universe = ReactionUniverse('all_reactions', REACTION_GROUPS)
molecule = Molecule('COC(=O)c1ccnc(CNC(=O)NCC2(C)CC(N=C=O)CC(C)(C)C2)c1')
synthons = molecule_to_synthon(molecule)
assert len(rxn_universe.get_matching_reactions(synthons[0])) == 9

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()