In [1]:
from rdkit import Chem
import useful_rdkit_utils as uru
from rdkit.Chem.Draw import MolsToGridImage
from rdkit.Chem.Scaffolds import MurckoScaffold
import itertools
import pandas as pd
from collections import Counter
import mols2grid

ModuleNotFoundError: No module named 'mols2grid'

Set the default image size

In [None]:
uru.rd_set_image_size(300,300)

Create a demo molecule

In [None]:
smi = "CCn1c(=O)/c(=C2\Sc3ccccc3N2C)s/c1=C\C1CCC[n+]2c1sc1ccccc12"
mol = Chem.MolFromSmiles(smi)
mol

Instantiate a RingSystemFinder object

In [None]:
ring_system_finder = uru.RingSystemFinder()

In order to demo the algorithm, we'll walk through it step by step.  First, we'll tag the bonds that shouldn't be cleaved when identifying ring systems.

In [None]:
ring_system_finder.tag_bonds_to_preserve(mol)

Now look at a structure with those bonds highlighted.

In [None]:
mol

Next, we cleave the tagged bonds.

In [None]:
m1 = ring_system_finder.cleave_linker_bonds(mol)
m1

Now we can look at the ring systems.

In [None]:
rx = ring_system_finder.cleanup_fragments(m1)
MolsToGridImage(rx)

That was just to show how the algorithm works.  Of course, this can also be done in one step.

In [None]:
ring_system_list = ring_system_finder.find_ring_systems(mol, as_mols=True)
MolsToGridImage(ring_system_list)

In [None]:
ring_system_list_with_Rgroups = ring_system_finder.find_ring_systems(mol, as_mols=True, keep_dummy=True)
MolsToGridImage(ring_system_list_with_Rgroups)

In [None]:
url = "https://raw.githubusercontent.com/PatWalters/useful_rdkit_utils/master/data/test.smi"
df = pd.read_csv(url,sep=" ",names=["SMILES","Name"])

In [None]:
df

In [None]:
ring_system_finder = uru.RingSystemFinder()
df['mol'] = df.SMILES.apply(Chem.MolFromSmiles)
df['ring_sytems'] = df.mol.apply(ring_system_finder.find_ring_systems)

In [None]:
df.ring_sytems.values

In [None]:
ring_system_list = list(itertools.chain.from_iterable(df.ring_sytems.values))
ring_count_df = pd.DataFrame(Counter(ring_system_list).items(),columns=["SMILES","Count"])
ring_count_df.sort_values("Count",ascending=False,inplace=True)

In [None]:
mols2grid.display(ring_count_df,subset=["img","Count"])

In [None]:
ring_system_lookup = uru.RingSystemLookup()
res = df.SMILES.apply(ring_system_lookup.process_smiles)

In [None]:
res

In [None]:
df['ring_freq'] = res.apply(uru.get_min_ring_frequency)

In [None]:
df

In [None]:
mols2grid.display(df,subset=["img","ring_freq"])