# Align Murcko Scaffolds

In [1]:
%load_ext autoreload
%autoreload 2

### Libraries

In [2]:
import open3d as o3d
import numpy as np
import seaborn as sns
import pandas as pd

from numpy.random import default_rng

import re, os
from io import StringIO

import tqdm
from tqdm.auto import trange

import py3Dmol

import molgrid

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import PandasTools
from rdkit.Chem.Scaffolds import MurckoScaffold as MS

from openbabel import pybel

import ipywidgets as widgets

import copy

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


  return f(*args, **kwds)
  return f(*args, **kwds)
INFO - 2021-09-14 11:02:00,650 - __init__ - Enabling RDKit 2021.03.1 jupyter extensions


In [3]:
from utils import show_molecule_idx, show_all_conformers
from utils import AlignShow, translate_and_rotate

import sys

sys.path.append("../../ligan-EVOTEC")

from molgrid_to_pcd import mol_to_grid, grid_to_pcd
from molgrid_diff import grid_diff
from fit_to_grid import molgrid_diff_to_mol

In [4]:
path = "ligands/BRD4"
files = [
    os.path.join(path, f)
    for f in os.listdir(path)
    if os.path.splitext(f)[-1] == ".pcd" and os.path.splitext(f)[0][-4:] == "tran"
]

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf"))
    mol = next(s)
    mols.append(mol)

['ligands/BRD4/ligand-1_tran.pcd', 'ligands/BRD4/ligand-2_tran.pcd', 'ligands/BRD4/ligand-3_tran.pcd', 'ligands/BRD4/ligand-4_tran.pcd', 'ligands/BRD4/ligand-5_tran.pcd', 'ligands/BRD4/ligand-6_tran.pcd', 'ligands/BRD4/ligand-7_tran.pcd', 'ligands/BRD4/ligand-8_tran.pcd', 'ligands/BRD4/ligand-9_tran.pcd', 'ligands/BRD4/ligand-10_tran.pcd']


In [5]:
_ = widgets.interact(
    lambda index: show_molecule_idx(index, mols),
    index=widgets.IntSlider(min=0, max=len(mols) - 1, step=1, value=1),
)

interactive(children=(IntSlider(value=1, description='index', max=9), Output()), _dom_classes=('widget-interac…

In [6]:
msMols = [MS.GetScaffoldForMol(mol) for mol in mols]

In [7]:
_ = widgets.interact(
    lambda index: show_molecule_idx(index, msMols),
    index=widgets.IntSlider(min=0, max=len(mols) - 1, step=1, value=1),
)

interactive(children=(IntSlider(value=1, description='index', max=9), Output()), _dom_classes=('widget-interac…

Write scaffolds to file:

In [8]:
for idx, mol in enumerate(msMols):
    # Randomly translate and rotate Murcko scaffolds
    translate_and_rotate(mol)

    with Chem.SDWriter(os.path.join(path, f"murcko_{idx}.sdf")) as w:
        w.write(mol, confId=0)

In [9]:
mkpcds = []
mkmols = []

files = [os.path.join(path, f"murcko_{i}.sdf") for i in range(len(mols))]

print(files)

typer = molgrid.FileMappedGninaTyper("../files/ligmap")

for f in tqdm.tqdm(files):
    # Load molecule as RDKit molecule
    s = Chem.SDMolSupplier(f)
    mol = next(s)
    mkmols.append(mol)

    # Load molecule as OpenBabel molecule
    obmol = next(pybel.readfile("sdf", f))

    grid, center = mol_to_grid(obmol, 23.5, 0.5, typer)
    pcd = grid_to_pcd(
        grid,
        center,
        23.5,
        0.5,
        typer,
    )

    # pcd = o3d.io.read_point_cloud(f)
    mkpcds.append(pcd)

print(mkpcds)

['ligands/BRD4/murcko_0.sdf', 'ligands/BRD4/murcko_1.sdf', 'ligands/BRD4/murcko_2.sdf', 'ligands/BRD4/murcko_3.sdf', 'ligands/BRD4/murcko_4.sdf', 'ligands/BRD4/murcko_5.sdf', 'ligands/BRD4/murcko_6.sdf', 'ligands/BRD4/murcko_7.sdf', 'ligands/BRD4/murcko_8.sdf', 'ligands/BRD4/murcko_9.sdf']


100%|██████████| 10/10 [00:03<00:00,  3.15it/s]

[PointCloud with 457 points., PointCloud with 475 points., PointCloud with 382 points., PointCloud with 352 points., PointCloud with 581 points., PointCloud with 496 points., PointCloud with 562 points., PointCloud with 492 points., PointCloud with 609 points., PointCloud with 608 points.]





## Align Murcko Scaffold with Original Molecule

### Reconstruction

In [10]:
def reconstruction(fname, verbose=False):
    """
    Reconstruct molecule from file.
    Files is assumed to contained the scaffold (confId=0)
    aligned to its original molecule (confId=1)
    """
    typer = molgrid.FileMappedGninaTyper("../files/ligmap")

    sdfile = pybel.readfile("sdf", fname)
    obmol1 = next(sdfile)
    obmol2 = next(sdfile)

    gdiff, c = grid_diff(obmol1, obmol2, 23.5, 0.5, typer)

    # Convert center to numpy array
    c = np.array([c[0], c[1], c[2]])

    npgdiff = gdiff.cpu().detach().numpy()

    # Load Murcko scaffold as RDKit molecule
    rdscaffold = next(Chem.SDMolSupplier(fname, removeHs=True))

    # Fit atoms into density difference
    # Link nearest atom from the fit to the scaffold to build whole molecule
    rdmolfinal = molgrid_diff_to_mol(
        npgdiff, c, 0.5, "../files/ligmap", rdscaffold, verbose=verbose
    )

    return rdmolfinal

In [11]:
molid = 0
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.9387308533916849


<py3Dmol.view at 0x7f1ed0e1be80>

In [12]:
rdmol0 = reconstruction("murcko_self_0.sdf")
show_all_conformers(rdmol0)

In [13]:
molid = 1
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.9473684210526315


<py3Dmol.view at 0x7f1db0da2668>

In [14]:
rdmol1 = reconstruction("murcko_self_1.sdf")
show_all_conformers(rdmol1)

In [15]:
molid = 2
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.8900523560209425


<py3Dmol.view at 0x7f1daa3ffc88>

In [16]:
rdmol2 = reconstruction("murcko_self_2.sdf")
show_all_conformers(rdmol2)

In [17]:
molid = 3
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.7727272727272727


<py3Dmol.view at 0x7f1daa420518>

In [18]:
rdmol3 = reconstruction("murcko_self_3.sdf")
show_all_conformers(rdmol3)

In [19]:
molid = 4
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.919104991394148


<py3Dmol.view at 0x7f1daa4209b0>

In [20]:
rdmol4 = reconstruction("murcko_self_4.sdf")
show_all_conformers(rdmol4)

In [21]:
molid = 5
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.9153225806451613


<py3Dmol.view at 0x7f1daa428630>

In [22]:
rdmol5 = reconstruction("murcko_self_5.sdf")
show_all_conformers(rdmol5)

RuntimeError: Pre-condition Violation
	Atomic number not found
	Violation occurred on line 151 in file Code/GraphMol/PeriodicTable.h
	Failed Expression: atomicNumber < byanum.size()
	RDKIT: 2021.03.1
	BOOST: 1_65_1


In [23]:
molid = 6
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.5516014234875445


<py3Dmol.view at 0x7f1daa428e80>

In [24]:
rdmol6 = reconstruction("murcko_self_6.sdf")
show_all_conformers(rdmol6)

RDKit ERROR: [11:02:36] 
RDKit ERROR: 
RDKit ERROR: ****
RDKit ERROR: Pre-condition Violation
RDKit ERROR: Atomic number not found
RDKit ERROR: Violation occurred on line 151 in file /opt/rdkit/Code/GraphMol/PeriodicTable.h
RDKit ERROR: Failed Expression: atomicNumber < byanum.size()
RDKit ERROR: ****
RDKit ERROR: 
RDKit ERROR: [11:03:22] Can't kekulize mol.  Unkekulized atoms: 15 17 19 22 23
RDKit ERROR: 


KekulizeException: Can't kekulize mol.  Unkekulized atoms: 15 17 19 22 23


In [25]:
molid = 7
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.7012195121951219


<py3Dmol.view at 0x7f1db0da24a8>

In [26]:
rdmol7 = reconstruction("murcko_self_7.sdf")
show_all_conformers(rdmol7)

In [27]:
molid = 8
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.9507389162561576


<py3Dmol.view at 0x7f1db0daa550>

In [28]:
rdmol8 = reconstruction("murcko_self_8.sdf")
show_all_conformers(rdmol8)

In [29]:
molid = 9
als = AlignShow([mols[molid], mkmols[molid]], [pcds[molid], mkpcds[molid]])
s = als.align(1, 0)
print("Score:", s)
als.save(1, 0, f"murcko_self_{molid}.sdf")
als.show(1, 0)

Score: 0.7796052631578947


<py3Dmol.view at 0x7f1db0daa518>

In [30]:
rdmol9 = reconstruction("murcko_self_9.sdf")
show_all_conformers(rdmol9)

RDKit ERROR: [11:04:21] Can't kekulize mol.  Unkekulized atoms: 26 27 28 29 34
RDKit ERROR: 


KekulizeException: Can't kekulize mol.  Unkekulized atoms: 26 27 28 29 34
