# Align VEHICLe Fragments to CDK2 Inhibitors

In [16]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


In [17]:
method = "molgrid"

In [18]:
import open3d as o3d
from rdkit import Chem
import numpy as np

from collections import defaultdict

import tqdm
import os, sys

import pickle

In [19]:
sys.path.append("../../")
sys.path.append("../")

In [20]:
from utils import align, show

## Load CDK2 Inhibitors

In [21]:
path = "../ligands/CDK2"

In [22]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and method in f and not "tran" in f:
        files.append(os.path.join(path, f))

# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(
    key=lambda f: int(
        names[
            os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")
        ].replace("CS", "")
    )
)

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    mols.append(mol)

['../ligands/CDK2/4ek4_B_1CK_molgrid.pcd', '../ligands/CDK2/4ek5_B_03K_molgrid.pcd', '../ligands/CDK2/4fkg_B_4CK_molgrid.pcd', '../ligands/CDK2/4fki_B_09K_molgrid.pcd', '../ligands/CDK2/4fkj_B_11K_molgrid.pcd', '../ligands/CDK2/3sw4_B_18K_molgrid.pcd', '../ligands/CDK2/3sw7_B_19K_molgrid.pcd', '../ligands/CDK2/4fko_B_20K_molgrid.pcd', '../ligands/CDK2/4fkp_B_LS5_molgrid.pcd', '../ligands/CDK2/4fkq_B_42K_molgrid.pcd', '../ligands/CDK2/4fkr_B_45K_molgrid.pcd', '../ligands/CDK2/4fks_B_46K_molgrid.pcd', '../ligands/CDK2/4fkt_B_48K_molgrid.pcd', '../ligands/CDK2/4fku_D_60K_molgrid.pcd', '../ligands/CDK2/4fkv_B_61K_molgrid.pcd', '../ligands/CDK2/4fkw_B_62K_molgrid.pcd']


## Load Fragments

Load fragments. Only fragments that are considered to be synthetically accessible are considered here, in order to reduce computational resources:

In [23]:
fragpath = path = "../../fragments/VEHICLe-good/"
fragfiles = [
    os.path.join(fragpath, f)
    for f in os.listdir(fragpath)
    if os.path.splitext(f)[-1] == ".pcd"
]

print(fragfiles[:5])

fragpcds = []
fragmols = []
for ff in tqdm.tqdm(fragfiles):
    pcd = o3d.io.read_point_cloud(ff)
    fragpcds.append(pcd)

    s = Chem.SDMolSupplier(ff.replace(".pcd", ".sdf"))
    mol = next(s)
    fragmols.append(mol)

['../../fragments/VEHICLe-good/fragment_1046.pcd', '../../fragments/VEHICLe-good/fragment_0.pcd', '../../fragments/VEHICLe-good/fragment_1.pcd', '../../fragments/VEHICLe-good/fragment_1047.pcd', '../../fragments/VEHICLe-good/fragment_10.pcd']


100%|██████████| 5419/5419 [00:05<00:00, 976.74it/s] 


## Align Fragments to CDK2 Inhibitors

In [25]:
alignments = {}

for i, f in enumerate(files):
    name = os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")

    alignments[names[name]] = defaultdict(list)

    for j, ff in enumerate(tqdm.tqdm(fragfiles, desc="Fragments " + names[name])):
        fidx = os.path.splitext(os.path.basename(ff))[0].replace(f"fragment_", "")

        try:
            gfit, cfit = align(fragmols[j], fragpcds[j], pcds[i])
        except RuntimeError:  # No alignment found
            gfit, cfit = np.nan

        alignments[names[name]]["fragment"].append(int(fidx))
        alignments[names[name]]["gfit"].append(gfit.fitness)
        alignments[names[name]]["cfit"].append(cfit.fitness)
        alignments[names[name]]["mol"].append(mols[i])
        alignments[names[name]]["fragmol"].append(fragmols[j])

Fragments CS1: 100%|██████████| 5419/5419 [22:46<00:00,  3.97it/s]
Fragments CS3: 100%|██████████| 5419/5419 [23:01<00:00,  3.92it/s]
Fragments CS4: 100%|██████████| 5419/5419 [22:38<00:00,  3.99it/s]
Fragments CS9: 100%|██████████| 5419/5419 [23:54<00:00,  3.78it/s]
Fragments CS11: 100%|██████████| 5419/5419 [23:28<00:00,  3.85it/s]
Fragments CS18: 100%|██████████| 5419/5419 [24:04<00:00,  3.75it/s]
Fragments CS19: 100%|██████████| 5419/5419 [24:15<00:00,  3.72it/s]
Fragments CS20: 100%|██████████| 5419/5419 [23:47<00:00,  3.80it/s]
Fragments CS241: 100%|██████████| 5419/5419 [20:31<00:00,  4.40it/s]
Fragments CS242: 100%|██████████| 5419/5419 [19:24<00:00,  4.65it/s]
Fragments CS245: 100%|██████████| 5419/5419 [19:45<00:00,  4.57it/s]
Fragments CS246: 100%|██████████| 5419/5419 [19:57<00:00,  4.53it/s]
Fragments CS248: 100%|██████████| 5419/5419 [19:39<00:00,  4.60it/s]
Fragments CS260: 100%|██████████| 5419/5419 [19:14<00:00,  4.69it/s]
Fragments CS261: 100%|██████████| 5419/5419 [2

In [26]:
def showbest(name):
    bestidx = np.argmax(alignments[name]["cfit"])
    print(alignments[name]["cfit"][bestidx])
    return show(alignments[name]["fragmol"][bestidx], alignments[name]["mol"][bestidx])

In [27]:
def showworst(name):
    bestidx = np.argmin(alignments[name]["cfit"])
    print(alignments[name]["cfit"][bestidx])
    return show(alignments[name]["fragmol"][bestidx], alignments[name]["mol"][bestidx])

In [28]:
showbest("CS1")

0.9691358024691358


<py3Dmol.view at 0x7fd90e87b880>

In [29]:
showworst("CS1")

0.579250720461095


<py3Dmol.view at 0x7fd90e87bc10>

In [30]:
with open("CDK2-VEHICLe.pkl", "wb") as f:
    pickle.dump(alignments, f)