# Align VEHICLe Fragments to CDK2 Inhibitors

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
method = "molgrid"

In [3]:
import open3d as o3d
from rdkit import Chem
import numpy as np

from collections import defaultdict

import tqdm
import os, sys

import pickle

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
sys.path.append("../../")
sys.path.append("../")

In [5]:
from utils import align, show

## Load CDK2 Inhibitors

In [6]:
path = "../ligands/CDK2"

In [7]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and method in f and not "tran" in f:
        files.append(os.path.join(path, f))

# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(
    key=lambda f: int(
        names[
            os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")
        ].replace("CS", "")
    )
)

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    mols.append(mol)

['../ligands/CDK2/4ek4_B_1CK_molgrid.pcd', '../ligands/CDK2/4ek5_B_03K_molgrid.pcd', '../ligands/CDK2/4fkg_B_4CK_molgrid.pcd', '../ligands/CDK2/4fki_B_09K_molgrid.pcd', '../ligands/CDK2/4fkj_B_11K_molgrid.pcd', '../ligands/CDK2/3sw4_B_18K_molgrid.pcd', '../ligands/CDK2/3sw7_B_19K_molgrid.pcd', '../ligands/CDK2/4fko_B_20K_molgrid.pcd', '../ligands/CDK2/4fkp_B_LS5_molgrid.pcd', '../ligands/CDK2/4fkq_B_42K_molgrid.pcd', '../ligands/CDK2/4fkr_B_45K_molgrid.pcd', '../ligands/CDK2/4fks_B_46K_molgrid.pcd', '../ligands/CDK2/4fkt_B_48K_molgrid.pcd', '../ligands/CDK2/4fku_D_60K_molgrid.pcd', '../ligands/CDK2/4fkv_B_61K_molgrid.pcd', '../ligands/CDK2/4fkw_B_62K_molgrid.pcd']


## Load Fragments

Load fragments. Only fragments that are considered to be synthetically accessible are considered here, in order to reduce computational resources:

In [8]:
fragpath = path = "../../fragments/VEHICLe-good/"
fragfiles = [
    os.path.join(fragpath, f)
    for f in os.listdir(fragpath)
    if os.path.splitext(f)[-1] == ".pcd"
]

print(fragfiles[:5])

fragpcds = []
fragmols = []
for ff in tqdm.tqdm(fragfiles):
    pcd = o3d.io.read_point_cloud(ff)
    fragpcds.append(pcd)

    s = Chem.SDMolSupplier(ff.replace(".pcd", ".sdf"))
    mol = next(s)
    fragmols.append(mol)

['../../fragments/VEHICLe-good/fragment_1046.pcd', '../../fragments/VEHICLe-good/fragment_0.pcd', '../../fragments/VEHICLe-good/fragment_1.pcd', '../../fragments/VEHICLe-good/fragment_1047.pcd', '../../fragments/VEHICLe-good/fragment_10.pcd']


100%|██████████| 5419/5419 [00:06<00:00, 850.59it/s]


## Align Fragments to CDK2 Inhibitors

In [9]:
alignments = {}

for i, f in enumerate(files):
    name = os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")

    alignments[names[name]] = defaultdict(list)

    for j, ff in enumerate(tqdm.tqdm(fragfiles, desc="Fragments " + names[name])):
        fidx = os.path.splitext(os.path.basename(ff))[0].replace(f"fragment_", "")

        try:
            gfit, cfit, hfit = align(fragmols[j], fragpcds[j], pcds[i], hfit=True)
        except RuntimeError:  # No alignment found
            gfit, cfit, hfit = np.nan

        alignments[names[name]]["fragment"].append(int(fidx))
        alignments[names[name]]["gfit"].append(gfit.fitness)
        alignments[names[name]]["cfit"].append(cfit.fitness)
        alignments[names[name]]["hfit"].append(hfit)
        alignments[names[name]]["mol"].append(mols[i])
        alignments[names[name]]["fragmol"].append(fragmols[j])

Fragments CS1:  22%|██▏       | 1179/5419 [11:55<48:55,  1.44it/s]  



Fragments CS1:  50%|████▉     | 2699/5419 [28:51<49:13,  1.09s/it]  



Fragments CS1:  50%|█████     | 2730/5419 [29:15<32:06,  1.40it/s]



Fragments CS1:  51%|█████     | 2753/5419 [29:30<27:17,  1.63it/s]



Fragments CS1:  54%|█████▍    | 2948/5419 [31:44<23:48,  1.73it/s]



Fragments CS1:  67%|██████▋   | 3611/5419 [38:40<21:25,  1.41it/s]



Fragments CS1: 100%|██████████| 5419/5419 [57:45<00:00,  1.56it/s]
Fragments CS3:  22%|██▏       | 1179/5419 [12:12<40:04,  1.76it/s] 



Fragments CS3:  50%|████▉     | 2699/5419 [27:40<29:57,  1.51it/s]  



Fragments CS3:  50%|█████     | 2730/5419 [27:59<28:20,  1.58it/s]



Fragments CS3:  51%|█████     | 2753/5419 [28:12<23:11,  1.92it/s]



Fragments CS3:  54%|█████▍    | 2948/5419 [30:15<27:43,  1.49it/s]



Fragments CS3:  67%|██████▋   | 3611/5419 [36:53<18:30,  1.63it/s]



Fragments CS3: 100%|██████████| 5419/5419 [53:30<00:00,  1.69it/s]
Fragments CS4:  22%|██▏       | 1179/5419 [10:42<35:29,  1.99it/s]



Fragments CS4:  50%|████▉     | 2699/5419 [24:39<22:34,  2.01it/s]



Fragments CS4:  50%|█████     | 2730/5419 [24:56<22:53,  1.96it/s]



Fragments CS4:  51%|█████     | 2753/5419 [25:08<24:37,  1.80it/s]



Fragments CS4:  54%|█████▍    | 2948/5419 [26:51<18:03,  2.28it/s]



Fragments CS4:  67%|██████▋   | 3611/5419 [33:01<17:24,  1.73it/s]



Fragments CS4: 100%|██████████| 5419/5419 [49:29<00:00,  1.82it/s]
Fragments CS9:  22%|██▏       | 1179/5419 [10:54<37:46,  1.87it/s] 



Fragments CS9:  50%|████▉     | 2699/5419 [25:13<24:07,  1.88it/s]



Fragments CS9:  50%|█████     | 2730/5419 [25:30<23:55,  1.87it/s]



Fragments CS9:  51%|█████     | 2753/5419 [25:42<21:23,  2.08it/s]



Fragments CS9:  54%|█████▍    | 2948/5419 [27:31<22:12,  1.85it/s]



Fragments CS9:  67%|██████▋   | 3611/5419 [33:39<18:20,  1.64it/s]



Fragments CS9: 100%|██████████| 5419/5419 [50:43<00:00,  1.78it/s]
Fragments CS11:  22%|██▏       | 1179/5419 [11:04<45:10,  1.56it/s] 



Fragments CS11:  50%|████▉     | 2699/5419 [25:14<25:57,  1.75it/s]



Fragments CS11:  50%|█████     | 2730/5419 [25:33<22:13,  2.02it/s]



Fragments CS11:  51%|█████     | 2753/5419 [25:47<30:08,  1.47it/s]



Fragments CS11:  54%|█████▍    | 2948/5419 [27:36<23:25,  1.76it/s]



Fragments CS11:  67%|██████▋   | 3611/5419 [33:49<16:41,  1.80it/s]



Fragments CS11: 100%|██████████| 5419/5419 [50:52<00:00,  1.78it/s]
Fragments CS18:  22%|██▏       | 1179/5419 [10:40<27:28,  2.57it/s]



Fragments CS18:  50%|████▉     | 2699/5419 [24:26<24:11,  1.87it/s]



Fragments CS18:  50%|█████     | 2730/5419 [24:44<26:12,  1.71it/s]



Fragments CS18:  51%|█████     | 2753/5419 [24:57<25:22,  1.75it/s]



Fragments CS18:  54%|█████▍    | 2948/5419 [26:42<26:35,  1.55it/s]



Fragments CS18:  67%|██████▋   | 3611/5419 [32:47<20:18,  1.48it/s]



Fragments CS18: 100%|██████████| 5419/5419 [49:20<00:00,  1.83it/s]
Fragments CS19:  22%|██▏       | 1179/5419 [10:50<39:59,  1.77it/s] 



Fragments CS19:  50%|████▉     | 2699/5419 [24:52<25:08,  1.80it/s]



Fragments CS19:  50%|█████     | 2730/5419 [25:09<21:42,  2.06it/s]



Fragments CS19:  51%|█████     | 2753/5419 [25:22<22:44,  1.95it/s]



Fragments CS19:  54%|█████▍    | 2948/5419 [27:10<22:55,  1.80it/s]



Fragments CS19:  67%|██████▋   | 3611/5419 [33:22<15:54,  1.89it/s]



Fragments CS19: 100%|██████████| 5419/5419 [50:09<00:00,  1.80it/s]
Fragments CS20:  22%|██▏       | 1179/5419 [10:43<38:55,  1.82it/s] 



Fragments CS20:  50%|████▉     | 2699/5419 [24:38<23:09,  1.96it/s]



Fragments CS20:  50%|█████     | 2730/5419 [24:55<27:17,  1.64it/s]



Fragments CS20:  51%|█████     | 2753/5419 [25:07<25:13,  1.76it/s]



Fragments CS20:  54%|█████▍    | 2948/5419 [26:56<26:13,  1.57it/s]



Fragments CS20:  67%|██████▋   | 3611/5419 [32:57<13:04,  2.30it/s]



Fragments CS20: 100%|██████████| 5419/5419 [49:15<00:00,  1.83it/s]
Fragments CS241:  22%|██▏       | 1179/5419 [10:40<31:01,  2.28it/s]



Fragments CS241:  50%|████▉     | 2699/5419 [24:18<23:29,  1.93it/s]



Fragments CS241:  50%|█████     | 2730/5419 [24:35<28:16,  1.59it/s]



Fragments CS241:  51%|█████     | 2753/5419 [24:46<22:54,  1.94it/s]



Fragments CS241:  54%|█████▍    | 2948/5419 [26:31<19:29,  2.11it/s]



Fragments CS241:  67%|██████▋   | 3611/5419 [32:31<15:57,  1.89it/s]



Fragments CS241: 100%|██████████| 5419/5419 [49:06<00:00,  1.84it/s]
Fragments CS242:  22%|██▏       | 1179/5419 [10:41<42:32,  1.66it/s]



Fragments CS242:  50%|████▉     | 2699/5419 [24:20<27:35,  1.64it/s]



Fragments CS242:  50%|█████     | 2730/5419 [24:36<18:06,  2.47it/s]



Fragments CS242:  51%|█████     | 2753/5419 [24:49<23:07,  1.92it/s]



Fragments CS242:  54%|█████▍    | 2948/5419 [26:35<24:26,  1.68it/s]



Fragments CS242:  67%|██████▋   | 3611/5419 [32:29<12:02,  2.50it/s]



Fragments CS242: 100%|██████████| 5419/5419 [47:34<00:00,  1.90it/s]
Fragments CS245:  22%|██▏       | 1179/5419 [04:10<14:12,  4.97it/s]



Fragments CS245:  50%|████▉     | 2699/5419 [09:30<08:51,  5.11it/s]



Fragments CS245:  50%|█████     | 2731/5419 [09:37<08:54,  5.03it/s]



Fragments CS245:  51%|█████     | 2753/5419 [09:41<08:54,  4.99it/s]



Fragments CS245:  54%|█████▍    | 2948/5419 [10:23<08:12,  5.02it/s]



Fragments CS245:  67%|██████▋   | 3611/5419 [12:42<07:05,  4.24it/s]



Fragments CS245: 100%|██████████| 5419/5419 [19:09<00:00,  4.71it/s]
Fragments CS246:  22%|██▏       | 1179/5419 [04:11<14:46,  4.79it/s]



Fragments CS246:  50%|████▉     | 2700/5419 [09:38<09:16,  4.88it/s]



Fragments CS246:  50%|█████     | 2731/5419 [09:45<08:36,  5.21it/s]



Fragments CS246:  51%|█████     | 2754/5419 [09:49<08:50,  5.02it/s]



Fragments CS246:  54%|█████▍    | 2948/5419 [10:31<08:17,  4.97it/s]



Fragments CS246:  67%|██████▋   | 3611/5419 [12:53<07:39,  3.93it/s]



Fragments CS246: 100%|██████████| 5419/5419 [19:24<00:00,  4.65it/s]
Fragments CS248:  22%|██▏       | 1180/5419 [04:11<15:15,  4.63it/s]



Fragments CS248:  50%|████▉     | 2700/5419 [09:34<09:27,  4.79it/s]



Fragments CS248:  50%|█████     | 2731/5419 [09:40<08:37,  5.20it/s]



Fragments CS248:  51%|█████     | 2753/5419 [09:45<08:51,  5.02it/s]



Fragments CS248:  54%|█████▍    | 2948/5419 [10:25<08:35,  4.80it/s]



Fragments CS248:  67%|██████▋   | 3611/5419 [12:47<06:50,  4.40it/s]



Fragments CS248: 100%|██████████| 5419/5419 [19:12<00:00,  4.70it/s]
Fragments CS260:  22%|██▏       | 1179/5419 [04:04<13:59,  5.05it/s]



Fragments CS260:  50%|████▉     | 2700/5419 [09:17<08:45,  5.18it/s]



Fragments CS260:  50%|█████     | 2731/5419 [09:24<08:58,  4.99it/s]



Fragments CS260:  51%|█████     | 2754/5419 [09:28<08:05,  5.49it/s]



Fragments CS260:  54%|█████▍    | 2948/5419 [10:08<08:06,  5.08it/s]



Fragments CS260:  67%|██████▋   | 3612/5419 [12:23<05:53,  5.11it/s]



Fragments CS260: 100%|██████████| 5419/5419 [18:39<00:00,  4.84it/s]
Fragments CS261:  22%|██▏       | 1179/5419 [04:15<16:10,  4.37it/s]



Fragments CS261:  50%|████▉     | 2700/5419 [09:44<09:23,  4.83it/s]



Fragments CS261:  50%|█████     | 2731/5419 [09:51<09:20,  4.80it/s]



Fragments CS261:  51%|█████     | 2754/5419 [09:56<08:45,  5.07it/s]



Fragments CS261:  54%|█████▍    | 2948/5419 [10:37<09:18,  4.42it/s]



Fragments CS261:  67%|██████▋   | 3611/5419 [13:00<06:02,  4.99it/s]



Fragments CS261: 100%|██████████| 5419/5419 [19:33<00:00,  4.62it/s]
Fragments CS262:  22%|██▏       | 1179/5419 [04:10<14:38,  4.83it/s]



Fragments CS262:  50%|████▉     | 2700/5419 [09:35<09:46,  4.64it/s]



Fragments CS262:  50%|█████     | 2730/5419 [09:41<09:05,  4.93it/s]



Fragments CS262:  51%|█████     | 2754/5419 [09:47<08:30,  5.22it/s]



Fragments CS262:  54%|█████▍    | 2948/5419 [10:28<08:00,  5.14it/s]



Fragments CS262:  67%|██████▋   | 3611/5419 [12:48<05:51,  5.14it/s]



Fragments CS262: 100%|██████████| 5419/5419 [19:15<00:00,  4.69it/s]


In [10]:
with open("CDK2-VEHICLe.pkl", "wb") as f:
    pickle.dump(alignments, f)