<a href="https://colab.research.google.com/github/Jahan08/RDKit-application/blob/main/PAINS-alert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Strucrural Alert removal

 #### When working in drug design, we often need filters to identify molecules containing functional groups that may be toxic, reactive, or could interfere with an assay. Here we use 8 functional alert sets: Dundee, BMS, Glaxo, Inpharmatica, LINT, PAINS, MLSMR, SureChEMBL

In [None]:
! pip install rdkit



In [None]:
! pip install useful_rdkit_utils pandas datamol ipywidgets tqdm



In [None]:
! pip install useful_rdkit_utils



In [None]:
import useful_rdkit_utils as uru
import pandas as pd


In [None]:
df = pd.read_csv('/content/sdfbrowserexport-EZEQKY4X.csv')
df

Unnamed: 0,ABase ID,AB Batch,Library,Supplier,Supplier ID,Smiles
0,AB00122949,2,ChemDiv01,ChemDiv,0160-0031,c1(c(cccc2NC(=O)c3ccccc3Br)c2)sc(c4n1)cccc4
1,AB00123147,2,ChemDiv01,ChemDiv,0368-0011,[N+](=O)([O-])c1c(Cl)ccc(c1)C(=O)Nc2nncs2
2,AB00141204,2,ChemDiv01,ChemDiv,3652-0238,N1(c(cc2)ccc2c3ccccc3)C(=O)C(CC1=O)Sc4nnc[nH]4
3,AB00146016,2,ChemDiv01,ChemDiv,4384-0087,N12C(SC=C1c3ccccc3)=NC(=O)\C(=C/c(cc4OC)cc(c4O...
4,AB00389839,7,MLS_FS_2010_2,MLSMR,SMR000072360,Cc1sc(SCCC(Nc2snc(c3ccccc3)n2)=O)nn1
...,...,...,...,...,...,...
136,AB01473847,1,ChemDiv01,ChemDiv,K783-5207,c1(C(=O)NCc2ccccc2)coc(nc(cccc3)c3n4)c14
137,AB01473863,1,ChemDiv01,ChemDiv,K783-5224,C(=C/c1ccccc1C)(/Sc(cccc2)c2N3)\C3=O
138,AB01473912,1,ChemDiv01,ChemDiv,K783-5413,N1(CCCC1=O)c(cc2)ccc2C(=O)NC3CCCCCCC3
139,AB01473928,1,ChemDiv01,ChemDiv,K783-6230,C(=C\c1cccc(C)c1)(/Oc(cccc2)c2N3)\C3=O


In [None]:
import datamol as dm
from ipywidgets import interact
from tqdm.auto import tqdm
from ipywidgets import IntSlider, Dropdown

In [None]:

reos_7 = uru.REOS("PAINS")  #optionally specify the rule set to use
reos_7.set_output_smarts(True) # the new code
df[['rule_set_7','reos_7', 'smarts_7']] = df.Smiles.apply(reos_7.process_smiles).tolist()
df


Unnamed: 0,ABase ID,AB Batch,Library,Supplier,Supplier ID,Smiles,rule_set_7,reos_7,smarts_7
0,AB00122949,2,ChemDiv01,ChemDiv,0160-0031,c1(c(cccc2NC(=O)c3ccccc3Br)c2)sc(c4n1)cccc4,ok,ok,ok
1,AB00123147,2,ChemDiv01,ChemDiv,0368-0011,[N+](=O)([O-])c1c(Cl)ccc(c1)C(=O)Nc2nncs2,ok,ok,ok
2,AB00141204,2,ChemDiv01,ChemDiv,3652-0238,N1(c(cc2)ccc2c3ccccc3)C(=O)C(CC1=O)Sc4nnc[nH]4,ok,ok,ok
3,AB00146016,2,ChemDiv01,ChemDiv,4384-0087,N12C(SC=C1c3ccccc3)=NC(=O)\C(=C/c(cc4OC)cc(c4O...,PAINS,ene_six_het_A(483),[#6]-1(-[#6](~[!#6&!#1]~[#6]-[!#6&!#1]-[#6]-1=...
4,AB00389839,7,MLS_FS_2010_2,MLSMR,SMR000072360,Cc1sc(SCCC(Nc2snc(c3ccccc3)n2)=O)nn1,ok,ok,ok
...,...,...,...,...,...,...,...,...,...
136,AB01473847,1,ChemDiv01,ChemDiv,K783-5207,c1(C(=O)NCc2ccccc2)coc(nc(cccc3)c3n4)c14,ok,ok,ok
137,AB01473863,1,ChemDiv01,ChemDiv,K783-5224,C(=C/c1ccccc1C)(/Sc(cccc2)c2N3)\C3=O,ok,ok,ok
138,AB01473912,1,ChemDiv01,ChemDiv,K783-5413,N1(CCCC1=O)c(cc2)ccc2C(=O)NC3CCCCCCC3,ok,ok,ok
139,AB01473928,1,ChemDiv01,ChemDiv,K783-6230,C(=C\c1cccc(C)c1)(/Oc(cccc2)c2N3)\C3=O,ok,ok,ok


### rule_set and reos are added to the dataframe with the name of the rule_set and the name of the rule matched by each molecule. If the molecule doesn't match any rules, both columns contain ok. In the following section we also add smarts column to see the structure of the molecules with structural alerts.

### Summarise the data and put it into a lists taht we will use for the visualization

In [None]:
vc = df.query("reos_7 != 'ok'").reos_7.value_counts()
rule_freq = vc.reset_index().values.tolist()
rule_freq = [(f"{a} ({b})",[a,b]) for a,b in rule_freq]

### Set up the interactive visualization: the trick is t set up the observe method on the rule sector, which contains teh rules and the number of molecules matching the rule.  

In [None]:
rule_selector = Dropdown(layout={'width': 'initial'},options=rule_freq,
                         description="Rule:")
mol_selector = IntSlider(min=0,max=rule_freq[0][1][1]-1,
                        description="Molecule:")

def update_slider_range(*args):
    mol_selector.max = rule_selector.value[1]-1
    mol_selector.value = 0

rule_selector.observe(update_slider_range,'value')

def foo(x,y):
    match_df = df.query("reos_7 == @x")
    row = match_df.iloc[y]
    return dm.viz.lasso_highlight_image(target_molecules=row.Smiles,search_molecules=row.smarts_7,use_svg=False)
interact(foo,x=rule_selector, y=mol_selector);

interactive(children=(Dropdown(description='Rule:', layout=Layout(width='initial'), options=(('ene_six_het_A(4…

### This cell is similar to the cell above except that it displays up to 6 molecules and the substructure matching the alert. Use the menu to examine molecules matching the alerts.