In [1]:
from openeye import oechem
from openeye import oedepict

from openeye.oechem import *
from openeye.oedepict import *
import oenotebook as oenb

In [2]:
iname = "../Open-Eye-Toolkit-Jupyter-Notebooks/Finding_Core_Fragment/getcore-test.smi"
rname = "../A.NewCode/matchingmols.pdf"
qfile = "../A.NewCode/querymol.mdl"

qifs = oemolostream()
if not qifs.open(qfile):
    OEThrow.Fatal("Cannot open file for query molecule!")
ifs = oemolistream()
if not ifs.open(iname):
    OEThrow.Fatal("Cannot open target input file!")

    
mol = oechem.OEGraphMol()
oechem.OEParseSmiles(mol, "Cc1cc2ccccc2nc1")
oenb.draw_mol(mol)
qnames = set()
smi = OEMolToSmiles(mol)
if smi not in qnames:
    qnames.add(smi)
    OEWriteMolecule(qifs, mol)


qidx = 1  
ext = OEGetFileExtension(rname)
if not OEIsRegisteredMultiPageImageFile(ext):
    OEThrow.Fatal("Unknown multipage image type!")

In [3]:
# initialize multi-page report
ropts = OEReportOptions()
ropts.SetHeaderHeight(140.0)
report = OEReport(ropts)

# setup depiction options

dopts = OE2DMolDisplayOptions()
cellwidth, cellheight = 200.0, 200.0
dopts.SetDimensions(cellwidth, cellheight, OEScale_AutoScale)

colors = OEGetContrastColors()
highlight = OEHighlightOverlayByBallAndStick(colors)

In [4]:
def GetSubstructureSearches(queryfnames):

    querymols = []
    subsearches = []

    for queryfname in queryfnames:
        querymol, subsearch = GetSubstructureSearch(queryfname)
        querymols.append(OEGraphMol(querymol))
        subsearches.append(OESubSearch(subsearch))

    return querymols, subsearches


def GetSubstructureMatches(subsearches, mol):

    unique = True
    matches = []
    for ss in subsearches:
        miter = ss.Match(mol, unique)
        if not miter.IsValid():
            return []
        else:
            match = miter.Target()
            matches.append(OEAtomBondSet(match.GetTargetAtoms(), match.GetTargetBonds()))

    return matches

def GetSubstructureSearch(queryfname):

    qifs = oemolistream()
    if not qifs.open(queryfname):
        OEThrow.Fatal("Cannot open mdl query file!")
    if qifs.GetFormat() != OEFormat_MDL:
        OEThrow.Fatal("Query file has to be an MDL file!")

    querymol = OEGraphMol()
    if not OEReadMDLQueryFile(qifs, querymol):
        OEThrow.Fatal("Cannot read query molecule!")
    OEPrepareDepiction(querymol)

    qmol = OEQMol()
    queryopts = OEMDLQueryOpts_Default | OEMDLQueryOpts_SuppressExplicitH
    OEBuildMDLQueryExpressions(qmol, querymol, queryopts)

    subsearch = OESubSearch()
    if not subsearch.Init(qmol):
        OEThrow.Fatal("Cannot initialize substructure search!")
    subsearch.SetMaxMatches(1)

    return (querymol, subsearch)

In [5]:
def DepictMoleculesWithSubstructureMatches(report, mollist, subsearches, opts, colors):

    highlight = OEHighlightOverlayByBallAndStick(colors)
    borderpen = OEPen(OEWhite, OELightGrey, OEFill_Off, 1.0)

    for mol in mollist:

        matches = GetSubstructureMatches(subsearches, mol)
        if len(matches) == 0:  # at least one substructure search fails
            continue 

        OEPrepareDepiction(mol)
        disp = OE2DMolDisplay(mol, opts)
        OEAddHighlightOverlay(disp, highlight, matches)

        cell = report.NewCell()
        OERenderMolecule(cell, disp)
        OEDrawBorder(cell, borderpen)


def DepictQueries(report, queries, colors):

    for header in report.GetHeaders():

        grid = OEImageGrid(header, 1, len(queries))
        grid.SetCellGap(4)
        cellwidth, cellheight = grid.GetCellWidth(), grid.GetCellHeight()
        opts = OE2DMolDisplayOptions(cellwidth, cellheight, OEScale_AutoScale)

        colors.ToFirst()
        for cell, query, color in zip(grid.GetCells(), queries, colors):
            disp = OE2DMolDisplay(query, opts)
            OERenderMolecule(cell, disp)
            OEDrawBorder(cell, OEPen(OEWhite, color, OEFill_Off, 4.0))

In [6]:
# read and initialize MDL queries
queries, subsearches = GetSubstructureSearches(qnames)
OEThrow.Info("Number of queries %d" % len(queries))

# import molecules

mollist = []
for mol in ifs.GetOEGraphMols():
    for ss in subsearches:
        OEPrepareSearch(mol, ss)
    mollist.append(OEGraphMol(mol))

# depict hit meoclules with highlighting matches

DepictMoleculesWithSubstructureMatches(report, mollist, subsearches, dopts, colors)
if report.NumPages() == 0:
    OEThrow.Info("No match found!")

# render each query structures into headers

DepictQueries(report, queries, colors)

OEWriteReport(rname, report)

Fatal: Cannot open mdl query file!
Fatal: Query file has to be an MDL file!
Fatal: Cannot read query molecule!
Fatal: Cannot initialize substructure search!
Number of queries 1











































































































































































































































































































































































































































































No match found!


False

In [7]:
class PDF(object):
  def __init__(self, pdf, size=(200,200)):
    self.pdf = pdf
    self.size = size

  def _repr_html_(self):
    return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)

  def _repr_latex_(self):
    return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)

In [8]:
PDF('../A. NewCode/matchingmols.pdf', size=(800,1000))