In [None]:
# default_exp templates.blocks

# Blocks

> Blocks are used for advanced templating where different templates are applied to different sections of the molecule


In [None]:
#hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

In [None]:
# export
from mrl.imports import *
from mrl.core import *
from mrl.chem import *
from mrl.templates.filters import *
from mrl.templates.template import *

  return f(*args, **kwds)


In [None]:
# export

class Block():
    def __init__(self, template, links, name, subblocks=[]):
        self.template = template
        self.links = links
        self.name = name
        self.subblocks = subblocks
        self.sublinks = []
        
    def update_links(self):
        # updates self.sublinks based on self.subblocks
        raise NotImplementedError
        
    def match_fragment(self, fragment):
        # determine if fragment matches block link pattern
        raise NotImplementedError
    
    def match_fragment_recursive(self, fragment):
        # recursively match fragment to all subblocks
        if self.match_fragment(fragment):
            output = True
        else:
            output = False
            for block in self.subblocks:
                if block.match_fragment_recursive(fragment):
                    output = True
                    
        return output
    
    def eval_mol(self, mol):
        mol = to_mol(mol)
        smile = to_smile(mol)
        
        if self.match_fragment(smile):
            hardpass = self.template(mol, filter_type='hard')
        else:
            hardpass = False

        if hardpass:
            score = self.template(mol, filter_type='soft')
        else:
            score = self.template.failscore
        
        return [hardpass, score]
    
    def add_mapping(self, fragment):
        # converts unmapped fragment to mapped fragment
        raise NotImplementedError
    
    def remove_mapping(self, fragment):
        # converts mapped fragment to unmapped fragment
        raise NotImplementedError

    def shuffle_mapping(sef, fragment):
        # shuffles mapping of attachment points
        raise NotImplementedError
    
    def check_num_links(self, fragment):
        # checks if the fragment has the same number of attachments as defined in self.links
        raise NotImplementedError
    
    def process_fragment(self, fragment):
        # checks if fragment has correct number of attachments and maps fragment
        
        if self.check_num_links(fragment):
            fragment = self.add_mapping(fragment)
            attachment_pass = True
        else:
            attachment_pass = False
            
        return [attachment_pass, fragment]
    
    def load_data(self, fragments, recurse=False):
        # checks fragment attachments, then sends to template `load_data`
        # optionally recursive
        if recurse:
            for b in self.subblocks:
                b.load_data(fragments, recurse=True)
        
        fragments = maybe_parallel(self.process_fragment, fragments)
        fragments = [i[1] for i in fragments if i[0]]
        self.template.screen_mols(fragments)
            
    def sample(self, n, log='hard'):
        return self.template.sample(n, log=log)
    
    def decompose_fragments(self, fragment_string):
        # decomposes a string of multiple fragments into a list of single fragments
        raise NotImplementedError
    
    def join_fragments(self, fragment_list):
        # joins list of fragments into single string
        raise NotImplementedError
    
    def fuse_fragments(self, fragment_string):
        # fuses fragment string into single output
        raise NotImplementedError
    
    def join_and_fuse(self, fragment_list):
        return self.fuse_fragments(self.join_fragments(fragment_list))
    
    def recurse_fragments(self, fragment):
        # recursively break down fragments, route to subblocks, fuse and evaluate
        raise NotImplementedError
        
    def __repr__(self):
        
        rep_str = f'Block {self.name}: {self.links}\n\t' + '\n\t'.join(self.template.__repr__().split('\n'))
        
        if self.subblocks:
            rep_str += '\n'
            for b in self.subblocks:
                rep_str += '\n\t' + '\n\t'.join(b.__repr__().split('\n'))
                
        return rep_str

In [None]:
class MolBlock(Block):
    def __init__(self, template, links, name, subblocks=[]):
        super().__init__(template, links, name, subblocks=subblocks)
        
        self.update_links()
        self.pattern = re.compile('\[(.*?)\*:(.*?)]') # identifies isotope and map num
    
    def update_links(self):
        for b in self.subblocks:
            self.sublinks.append(b.links)
            for sl in b.sublinks:
                self.sublinks.append(sl)

    def match_fragment(self, fragment, do_map=True, links=None):
        # determine if fragment matches block link pattern
        
        if self.check_num_links(fragment):
            if do_map:
                self.add_mapping(fragment)

            if links is None:
                links = self.links

            pattern_matches = self.pattern.findall(fragment) # find isotope/map num pairs

            if set(pattern_matches) == set(links):
                link_match = True
            else:
                link_match = False
        else:
            link_match = False
        
        return link_match
            
    def add_mapping(self, fragment, links=None):
        pattern_matches = self.pattern.findall(fragment)
        
        if len(pattern_matches) == len(self.links):
            # already mapped
            output = fragment
        else:
            if len(pattern_matches) > 0:
                # partially mapped, something went wrong
                fragment = self.remove_mapping(fragment)
        
            if links is None:
                links = list(self.links)
                random.shuffle(links)
                
            new_fragment = ''
            link_count = 0
            for s in fragment:
                if s=='*':
                    isotope, map_num = links[link_count]
                    s = f'[{isotope}*:{map_num}]'
                    link_count += 1
            
                new_fragment += s
                
            output = new_fragment
            
        return output
    
    def remove_mapping(self, fragment):
        current_mapping = self.pattern.findall(fragment)
        
        for cm in current_mapping:
            isotope, map_num = cm
            fragment = fragment.replace(f'[{isotope}*:{map_num}]', '*')
            
        return fragment
    
    def shuffle_mapping(self, fragment):
        current_mapping = self.pattern.findall(fragment)
        new_mapping = list(current_mapping)
        random.shuffle(new_mapping)
        
        fragment = self.remove_mapping(fragment)
        fragment = self.add_mapping(fragment, links=new_mapping)
        return fragment
    
    def check_num_links(self, fragment):
        return fragment.count('*') == len(self.links)
    
    def sample_smiles(self, n, log='hard'):
        return self.template.sample_smiles(n, log=log)
    
    def decompose_fragments(self, fragment_string):
        return fragment_string.split('.')
    
    def join_fragments(self, fragment_list):
        return '.'.join(fragment_list)
    
    def fuse_fragments(self, fragment_string):
        
        new_smile = fuse_on_atom_mapping(fragment_string)
        
        if '[*' in new_smile: #lost isotope
            new_smile = new_smile.replace('[*', '[0*')
            
        return new_smile
    
    def recurse_fragments(self, fragments):

        output_dicts = []
        total_pass = []
        total_score = 0.

        if type(fragments) == str:
            fragments = [fragments]

        fragments = [self.decompose_fragments(i) for i in fragments]
        fragments = [item for sublist in fragments for item in sublist]

        if self.subblocks:
            new_fragments = []

            unrouted = list(fragments) # copy list

            for sb in self.subblocks:
                routed = [i for i in unrouted if sb.match_fragment_recursive(i)]
                unrouted = [i for i in unrouted if not i in routed]

                if routed:
                    r_fused, r_pass, r_score, subdicts = sb.recurse_fragments(routed)
                    new_fragments.append(r_fused)
                    total_pass.append(r_pass)
                    total_score += r_score
                    output_dicts += subdicts

                if isinstance(sb, ConstantBlock):
                    new_fragments.append(sb.smile)

            fragments = new_fragments + unrouted

        joined_fragments = self.join_fragments(fragments)
        fused = self.fuse_fragments(joined_fragments)

        frag_pass, frag_score = self.eval_mol(fused)
        total_pass.append(frag_pass)
        total_score += frag_score

        total_pass = all(total_pass)

        output_dict = {
            'block' : self.name,
            'fused' : fused,
            'fragments' : fragments,
            'block_pass' : frag_pass,
            'block_score' : frag_score,
            'all_pass' : total_pass,
            'all_score' : total_score
        }

        output_dicts.append(output_dict)

        return fused, total_pass, total_score, output_dicts


In [None]:
# export

class ConstantBlock():
    def __init__(self, constant, name):
        self.constant = constant
        self.name = name
        self.links = []
        self.subblocks = []
        self.sublinks = []
        
    def match_fragment(self, fragment):
        return False
    
    def match_fragment_recursive(self, fragment):
        return False

    def load_data(self, fragments, recurse=False):
        pass
            
    def sample_data(self, n):
        return pd.DataFrame([self.constant, 0.]*n, columns=['smiles', 'final'])
    
    def __repr__(self):
        
        rep_str = f'Constant Block: {self.constant}'
        
        return rep_str
    

class ConstantMolBlock(ConstantBlock):
    def __init__(self, smile, name):
        super().__init__(smile, name)
        self.smile = canon_smile(smile)
        if '[*' in self.smile:
            self.smile = self.smile.replace('[*', '[0*')
        self.pattern = re.compile('\[(.*?)\*:(.*?)]')
        self.links = self.pattern.findall(smile)
    
    def sample_smile(self, n):
        return [self.smile]*n
    
    def __repr__(self):
        
        rep_str = f'Constant Block: {self.smile}'
        
        return rep_str

In [None]:
# scheme - constant scaffold, two variable r groups

# scaffod
scaffold_smile = 'c1nc2c([0*:2])cncc2cc1[0*:1]'
scaffold_block = ConstantMolBlock(scaffold_smile, 'scaffold')

# R1, must have ring, be between 50-250 g/mol. must have 1 ring. ideally less thn 100-200 g/mol

r1_template = Template(
                    [MolWtFilter(50, 250),
                     RingFilter(1,1)],
                    [MolWtFilter(100, 200, 1)],
                    fail_score=-1
                    )

r1_block = MolBlock(r1_template, [('1', '1')], 'r1')


# R2, must have no rings, be between 0-200 g/mol. must have 0 rings. ideally less thn 50-150 g/mol

r2_template = Template(
                    [MolWtFilter(0, 200),
                     RingFilter(None,0)],
                    [MolWtFilter(50,150,1)],
                    fail_score=-1
                    )

r2_block = MolBlock(r2_template, [('1', '2')], 'r2')


# full compound, must be between 200 and 550 g/mol

full_template = Template(
                    [MolWtFilter(200, 550)],
                    fail_score=-1)

main_block = MolBlock(full_template, [], 'full_molecule', subblocks=[scaffold_block, r1_block, r2_block])

In [None]:
df = pd.read_csv('files/smiles.csv')

In [None]:
fragments = fragment_smiles(df.smiles.values, [1,2])

In [None]:
len(fragments)

38451

In [None]:
pd.DataFrame([i.count('*') for i in fragments])[0].value_counts()

2    22553
1    15898
Name: 0, dtype: int64

In [None]:
main_block.load_data(df.smiles.values, recurse=True)

In [None]:
main_block.load_data(fragments, recurse=True)

In [None]:
main_block.template.soft_log

Unnamed: 0,smiles,final
0,CNc1nc(SCC(=O)Nc2cc(Cl)ccc2OC)nc2ccccc12,0
1,COc1ccc(C(=O)Oc2ccc(/C=C3\C(=N)N4OC(C)=CC4=NC3...,0
2,Cc1sc(NC(=O)c2ccccc2)c(C(N)=O)c1C,0
3,COc1ccc(NCc2noc(-c3ccoc3)n2)cc1OC(F)F,0
4,O=C(COC(=O)c1cccc(Br)c1)c1ccc2c(c1)OCCCO2,0
...,...,...
1962,Cc1cccc(C)c1C(=O)Nc1cc(S(=O)(=O)NC2CC2)ccc1O,0
1963,C/C(=N/NC(=O)CN(c1ccccc1Br)S(C)(=O)=O)c1ccc2c(...,0
1964,CCC(=O)Nc1ccc(NC(=O)NCCc2cccs2)cc1C,0
1965,CCC(CC)(NC(=O)Cc1c(F)cccc1F)/C(N)=N/O,0


In [None]:
main_block.subblocks[1].template.soft_log

Unnamed: 0,smiles,0,final
0,COc1c(C)cnc([1*:1])c1C,1.0,1.0
1,COc1cccc(C(=O)C=C([O-])C(=O)OC[1*:1])c1,0.0,0.0
2,COc1cc(C)ccc1OCCC(=O)N[1*:1],0.0,0.0
3,Cc1ccc(O[1*:1])cc1,1.0,1.0
4,CS(=O)(=O)N(c1ccccc1)[1*:1],1.0,1.0
...,...,...,...
4615,O=[N+]([O-])c1ccccc1N[1*:1],1.0,1.0
4616,Cc1nnc([1*:1])o1,0.0,0.0
4617,CC(=O)c1cccn1[1*:1],1.0,1.0
4618,O=C(COC(=O)Cc1ccccc1F)N[1*:1],0.0,0.0


In [None]:
main_block.subblocks[2].template.soft_log

Unnamed: 0,smiles,0,final
0,CC(C)CCNC(=O)NC[1*:2],1.0,1.0
1,N[1*:2],0.0,0.0
2,CC(=NO)[1*:2],1.0,1.0
3,COC(=O)C(C)(C)[1*:2],1.0,1.0
4,CC(C)(CCO)CNC(=O)NC[1*:2],0.0,0.0
...,...,...,...
552,CNC(=O)NC(=O)[1*:2],1.0,1.0
553,COCCO[1*:2],1.0,1.0
554,CN(C)S(=O)(=O)[1*:2],1.0,1.0
555,CC(C)(C)CC[1*:2],1.0,1.0


In [None]:
fragment_string = '[1*:2]C(=O)NC(CO)CC(C)C.[1*:1]S(=O)(=O)NCc1ccc(O)c(C(=O)O)c1'

In [None]:
main_block.fuse_fragments(fragment_string+'.'+scaffold_block.smile)

'CC(C)CC(CO)NC(=O)c1cncc2cc(S(=O)(=O)NCc3ccc(O)c(C(=O)O)c3)cnc12'

In [None]:
main_block.match_fragment('[1*:2]C(=O)NC(CO)CC(C)C')

False

In [None]:
main_block.match_fragment_recursive('[1*:2]C(=O)NC(CO)CC(C)C')

True

In [None]:
out = main_block.recurse_fragments('[1*:2]C(=O)NC(CO)CC(C)C.[1*:1]S(=O)(=O)NCc1ccc(O)c(C(=O)O)c1')

In [None]:
out[0]

'CC(C)CC(CO)NC(=O)c1cncc2cc(S(=O)(=O)NCc3ccc(O)c(C(=O)O)c3)cnc12'

In [None]:
out[1]

True

In [None]:
out[2]

1.0

In [None]:
# frag_df = pd.read_csv('../../chem_research/fragments.csv')
# df = pd.read_csv('/Users/karl/Desktop/bindingDB/filtered_ligands.smi', header=None, sep='\t')
# df.columns = ['smile', 'name']

Clean up docs
add returns between lines in docstrings for better rendering
Make overview page for templates
Make tutorial for enumeration (remove existing from chem notebook)
make tutoriaal for baasic templates
make tutorial for intermediate templates (custom filters, etc)
make tutorial for advanced templates (blocks)
figure out page links in nbdev