In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
method = "molgrid"

n_frags = 25

In [3]:
import open3d as o3d
from rdkit import Chem
import numpy as np

from collections import defaultdict

import tqdm
import os, sys, re

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
sys.path.append("../../")
sys.path.append("../")

In [5]:
from utils import show, transform_and_add_conformer
from score_pcd import fit_and_score
from molgrid_to_pcd import sensaas_color_groups_rgb_molgrid

print(sensaas_color_groups_rgb_molgrid)


def align(mol1, pcd1, pcd2):
    gfit, cfit, hfit, tran = fit_and_score(
        (pcd1, pcd2),
        voxel_size=0.5,
        threshold=0.5,
        color_groups=sensaas_color_groups_rgb_molgrid,
    )
    transform_and_add_conformer(mol1, tran, fromConfId=0, toConfId=1)
    return gfit, cfit, hfit

[[(0.2, 1.0, 1.0), (0.0, 0.0, 1.0), (0.0, 0.0, 1.0), (0.0, 0.0, 1.0), (0.0, 0.0, 1.0), (0.6, 0.0, 0.0), (0.6, 0.0, 0.0), (0.6, 0.0, 0.0), (0.6, 0.6, 0.0), (0.6, 0.6, 0.0)], [(0.3764705882352941, 0.3764705882352941, 0.3764705882352941), (0.3764705882352941, 0.3764705882352941, 0.3764705882352941), (0.7529411764705882, 0.7529411764705882, 0.7529411764705882), (0.7529411764705882, 0.7529411764705882, 0.7529411764705882), (0.6, 0.2980392156862745, 0.0), (0.6, 0.2980392156862745, 0.0)]]


## Load BRD4 Inhibitors

In [6]:
path = "../ligands/BRD4"

In [7]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if (
        ext == ".pcd"
        and method in fname
        and not "tran" in fname
        and not "murcko" in fname
    ):
        files.append(os.path.join(path, f))

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    mols.append(mol)

['../ligands/BRD4/ligand-1_molgrid.pcd', '../ligands/BRD4/ligand-2_molgrid.pcd', '../ligands/BRD4/ligand-3_molgrid.pcd', '../ligands/BRD4/ligand-4_molgrid.pcd', '../ligands/BRD4/ligand-5_molgrid.pcd', '../ligands/BRD4/ligand-6_molgrid.pcd', '../ligands/BRD4/ligand-7_molgrid.pcd', '../ligands/BRD4/ligand-8_molgrid.pcd', '../ligands/BRD4/ligand-9_molgrid.pcd', '../ligands/BRD4/ligand-10_molgrid.pcd']


## Load Fragments

Load fragments. Only fragments that are considered to be synthetically accessible are considered here, in order to reduce computational resources:

In [8]:
fragpath = path = "../../fragments/VEHICLe-good/"
fragfiles = [
    os.path.join(fragpath, f)
    for f in os.listdir(fragpath)
    if os.path.splitext(f)[-1] == ".pcd"
]

fragfiles = fragfiles[:n_frags]

fragpcds = []
fragmols = []
for ff in tqdm.tqdm(fragfiles):
    pcd = o3d.io.read_point_cloud(ff)
    fragpcds.append(pcd)

    s = Chem.SDMolSupplier(ff.replace(".pcd", ".sdf"))
    mol = next(s)
    fragmols.append(mol)

100%|██████████| 25/25 [00:00<00:00, 1131.91it/s]


## Align Fragments to CDK2 Inhibitors

In [9]:
alignments = {}

for i, f in enumerate(files):
    name = os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")

    alignments[name] = defaultdict(list)

    for j, ff in enumerate(tqdm.tqdm(fragfiles, desc="Fragments " + name)):
        fidx = os.path.splitext(os.path.basename(ff))[0].replace(f"fragment_", "")

        gfit, cfit, hfit = align(fragmols[j], fragpcds[j], pcds[i])

        alignments[name]["fragment"].append(int(fidx))
        alignments[name]["gfit"].append(gfit.fitness)
        alignments[name]["cfit"].append(cfit.fitness)
        alignments[name]["hfit"].append(hfit)
        alignments[name]["mol"].append(mols[i])
        alignments[name]["fragmol"].append(fragmols[j])

Fragments ligand-1: 100%|██████████| 25/25 [00:07<00:00,  3.34it/s]
Fragments ligand-2: 100%|██████████| 25/25 [00:06<00:00,  4.02it/s]
Fragments ligand-3: 100%|██████████| 25/25 [00:07<00:00,  3.42it/s]
Fragments ligand-4: 100%|██████████| 25/25 [00:07<00:00,  3.42it/s]
Fragments ligand-5: 100%|██████████| 25/25 [00:06<00:00,  3.70it/s]
Fragments ligand-6: 100%|██████████| 25/25 [00:07<00:00,  3.29it/s]
Fragments ligand-7: 100%|██████████| 25/25 [00:07<00:00,  3.56it/s]
Fragments ligand-8: 100%|██████████| 25/25 [00:07<00:00,  3.51it/s]
Fragments ligand-9: 100%|██████████| 25/25 [00:07<00:00,  3.37it/s]
Fragments ligand-10: 100%|██████████| 25/25 [00:07<00:00,  3.24it/s]


In [10]:
import pandas as pd

d = {}
for outerKey, innerDict in alignments.items():
    for innerKey, values in innerDict.items():
        d[(outerKey, innerKey)] = values

df = pd.DataFrame.from_dict(d)
df = df.stack(level=0).swaplevel().sort_index()
df.index.names = ["lig", "idx"]

In [11]:
df["cfit + hfit"] = df["cfit"] + df["hfit"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,cfit,fragment,fragmol,gfit,hfit,mol,cfit + hfit
lig,idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ligand-1,0,0.867435,1046,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0bb80>,0.841499,0.645533,<rdkit.Chem.rdchem.Mol object at 0x7fdd759fe9a0>,1.512968
ligand-1,1,0.941176,0,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0bbe0>,0.818182,0.716578,<rdkit.Chem.rdchem.Mol object at 0x7fdd759fe9a0>,1.657754
ligand-1,2,0.926380,1,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0b940>,0.447853,0.625767,<rdkit.Chem.rdchem.Mol object at 0x7fdd759fe9a0>,1.552147
ligand-1,3,0.809783,1047,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0b9a0>,0.760870,0.581522,<rdkit.Chem.rdchem.Mol object at 0x7fdd759fe9a0>,1.391304
ligand-1,4,0.883582,10,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0bb20>,0.782090,0.540299,<rdkit.Chem.rdchem.Mol object at 0x7fdd759fe9a0>,1.423881
...,...,...,...,...,...,...,...,...
ligand-9,20,0.807786,1006,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0d220>,0.749392,0.576642,<rdkit.Chem.rdchem.Mol object at 0x7fdd759feb20>,1.384428
ligand-9,21,0.863636,1055,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0d280>,0.766667,0.524242,<rdkit.Chem.rdchem.Mol object at 0x7fdd759feb20>,1.387879
ligand-9,22,0.834568,1007,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0d460>,0.765432,0.592593,<rdkit.Chem.rdchem.Mol object at 0x7fdd759feb20>,1.427160
ligand-9,23,0.900568,1056,<rdkit.Chem.rdchem.Mol object at 0x7fdd75a0d4c0>,0.698864,0.659091,<rdkit.Chem.rdchem.Mol object at 0x7fdd759feb20>,1.559659


In [12]:
def getbest(df, lig, criteria="cfit", k=3):
    return df.query("lig == @lig").sort_values(criteria, ascending=False).head(k)


def showbest(df, lig, criteria="cfit", k=10):
    best = getbest(df, lig, criteria, k)

    for idx, row in best.iterrows():
        p = show(row.fragmol, row.mol)
        print(row[criteria])
        display(p)

In [13]:
showbest(df, "ligand-7", criteria="cfit + hfit")

1.4919354838709677


<py3Dmol.view at 0x7fdd72cf8760>

1.4764705882352942


<py3Dmol.view at 0x7fdd72cf8b50>

1.4696969696969697


<py3Dmol.view at 0x7fde48b602b0>

1.4539877300613497


<py3Dmol.view at 0x7fdd72cf8250>

1.4441176470588235


<py3Dmol.view at 0x7fdd72cf8a30>

1.4440677966101694


<py3Dmol.view at 0x7fde48b602b0>

1.440922190201729


<py3Dmol.view at 0x7fdd72cf8430>

1.4375


<py3Dmol.view at 0x7fdd72cf8a90>

1.4289340101522843


<py3Dmol.view at 0x7fde48b602b0>

1.4236842105263157


<py3Dmol.view at 0x7fdd72cf8400>