In [65]:
import pandas as pd
from ring_systems import RingSystemLookup, ring_systems_min_score
import mols2grid
from tqdm.auto import tqdm
from rdkit import Chem
from operator import itemgetter
import ipywidgets as widgets
from ipywidgets import interact

Read the output from [REINVENT](https://github.com/MarcusOlivecrona/REINVENT)

In [2]:
df = pd.read_csv("memory.csv")

A quick view of the dataframe

In [3]:
df

Unnamed: 0.1,Unnamed: 0,smiles,score,likelihood
0,4,c1ccncc1C=C1c2c(cccc2)NC1=O,0.775617,-18.633299
1,3,c1ccc(-n2c(CCC)c(C(=O)Nc3cccc(O)c3)cn2)cc1,0.772078,-25.412125
2,88,Cc1c(C)cc(-n2c3c(cn2)C(NC(c2occc2)=O)CCC3)cc1,0.769490,-22.554949
3,122,n1cc(-c2cn[nH]c2)ccc1C1N2CCCC23C(=O)N(CCC)CC1C3,0.766043,-35.582294
4,0,C(CCC)Cn1cccc(C=C2c3c(C)cccc3NC2=O)c1=O,0.765939,-31.755280
...,...,...,...,...
995,76,c1cc(CN=c2c3cc(OC)c(OC)cc3[nH]c(N(C)CCCN(C)C)n...,0.488092,-24.301456
996,73,c1nccn1CC(C)NS(=O)(c1cccc2cnccc12)=O,0.488084,-29.036330
997,67,c1(OC)cc2c(n(CCCC)c3c2cnn(CC(NC2CCCC2)=O)c3=O)cc1,0.488031,-27.520111
998,43,c1(-c2c(O)cccc2)nc(C(NC2Cc3c(cccc3)C2)=O)ccc1Cl,0.488004,-32.733673


Instatiate a RingSystemLookup object.  This object keeps a dictionary of ring systems in ChEMBL with their frequencies.  Each new ring is scored based on its frequency of occurence in ChEMBL. 

In [4]:
rsl = RingSystemLookup()

In [9]:
df['ring_score'] = [ring_systems_min_score(rsl.process_smiles(x)) for x in tqdm(df.smiles.values)]

  0%|          | 0/1000 [00:00<?, ?it/s]

Sort the data by ring frequency to identify "odd" rings. 

In [22]:
df = df.sort_values('ring_score')

Display molecules sorted by ring system frequency. 

In [50]:
mols2grid.display(df,smiles_col="smiles",subset=["img","ring_score","mols2grid-id"])

Use the slider below to select view the lowest scoring ring system in each molecule.  You can also enter the index of the molecule in the box to the right of the slider. 

In [75]:
@interact(w=(0,len(df)-1,1))
def show(w=0):
    res = rsl.process_smiles(df.smiles.values[w])
    res.sort(key=itemgetter(1))
    print(res[0][0])
    return Chem.MolFromSmiles(res[0][0])

interactive(children=(IntSlider(value=0, description='w', max=999), Output()), _dom_classes=('widget-interact'…