# Align Molecules

In this notebook we explire if we can use SENSAAS with point clouds from `libmolgrid`-generated densities in order to align molecules (as opposed to the molecular surfaces SENSAAS computes).

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
dir = 'failed/'
for f in os.listdir(dir):
    os.remove(os.path.join(dir, f))
 

### Libraries

In [3]:
import sys
sys.path.append("../../")
sys.path.append("../")

In [4]:
import open3d as o3d
import pandas as pd
from collections import defaultdict

import re, os
from rdkit import Chem

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


INFO - 2022-03-15 16:16:05,974 - __init__ - Enabling RDKit 2021.03.3 jupyter extensions


In [5]:
from utils import align_and_show

Store RMSD before and after alignment (for both BRD4 and CDK2)

In [6]:
rmsds = defaultdict(list)

## BRD4 Inhibitors

In [7]:
path = "../ligands/BRD4"

Loda original BRD4 inhibitors together with their point cloud representation (pre-computed):

In [8]:
files = [os.path.join(path, f) for f in os.listdir(path) if os.path.splitext(f)[-1] == ".pcd" and not "frag" in os.path.splitext(f)[0] and not "conf" in os.path.splitext(f)[0] and not "tran" in os.path.splitext(f)[0]]

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf"))
    mol = next(s)
    mols.append(mol)

['../ligands/BRD4/ligand-1.pcd', '../ligands/BRD4/ligand-2.pcd', '../ligands/BRD4/ligand-3.pcd', '../ligands/BRD4/ligand-4.pcd', '../ligands/BRD4/ligand-5.pcd', '../ligands/BRD4/ligand-6.pcd', '../ligands/BRD4/ligand-7.pcd', '../ligands/BRD4/ligand-8.pcd', '../ligands/BRD4/ligand-9.pcd', '../ligands/BRD4/ligand-10.pcd']


Load translated and rotated BRD4 inhibitors with their point-cloud representation (pre-computed):

In [9]:
files = [os.path.join(path, f) for f in os.listdir(path) if os.path.splitext(f)[-1] == ".pcd" and os.path.splitext(f)[0][-4:] == "tran"]

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

tpcds = []
tmols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    tpcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf"))
    mol = next(s)
    tmols.append(mol)

['../ligands/BRD4/ligand-1_tran.pcd', '../ligands/BRD4/ligand-2_tran.pcd', '../ligands/BRD4/ligand-3_tran.pcd', '../ligands/BRD4/ligand-4_tran.pcd', '../ligands/BRD4/ligand-5_tran.pcd', '../ligands/BRD4/ligand-6_tran.pcd', '../ligands/BRD4/ligand-7_tran.pcd', '../ligands/BRD4/ligand-8_tran.pcd', '../ligands/BRD4/ligand-9_tran.pcd', '../ligands/BRD4/ligand-10_tran.pcd']


In [10]:
for i, f in enumerate(files):
    rmsd_i, rmsd_f, _, _ = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])
    rmsds["system"].append("BRD4")
    rmsds["lig"].append(os.path.splitext(os.path.basename(f))[0].replace("_tran", ""))
    rmsds["RMSDi"].append(rmsd_i)
    rmsds["RMSDf"].append(rmsd_f)
    rmsds["idx"].append(i)

### Failures

In [11]:
df = pd.DataFrame(rmsds)

In [12]:
df[df.RMSDf > 2.0]

Unnamed: 0,system,lig,RMSDi,RMSDf,idx


In [13]:
for _, (i, lig) in df[df.RMSDf > 0.1][["idx", "lig"]].iterrows():
    _, _, p, tmol = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])

    with Chem.SDWriter(f"failed/{lig}_alignfail.sdf") as w:
        w.write(tmol,  confId=1)

    display(p)

## CDK2 Inhibitors

In [14]:
path = "../ligands/CDK2"

In [15]:
files = [os.path.join(path, f) for f in os.listdir(path) if os.path.splitext(f)[-1] == ".pcd" and not "frag" in os.path.splitext(f)[0] and not "conf" in os.path.splitext(f)[0] and not "tran" in os.path.splitext(f)[0]]

# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(key=lambda f: int(names[os.path.splitext(os.path.basename(f))[0]].replace("CS", "")))

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf"))
    mol = next(s)
    mols.append(mol)

['../ligands/CDK2/4ek4_B_1CK.pcd', '../ligands/CDK2/4ek5_B_03K.pcd', '../ligands/CDK2/4fkg_B_4CK.pcd', '../ligands/CDK2/4fki_B_09K.pcd', '../ligands/CDK2/4fkj_B_11K.pcd', '../ligands/CDK2/3sw4_B_18K.pcd', '../ligands/CDK2/3sw7_B_19K.pcd', '../ligands/CDK2/4fko_B_20K.pcd', '../ligands/CDK2/4fkp_B_LS5.pcd', '../ligands/CDK2/4fkq_B_42K.pcd', '../ligands/CDK2/4fkr_B_45K.pcd', '../ligands/CDK2/4fks_B_46K.pcd', '../ligands/CDK2/4fkt_B_48K.pcd', '../ligands/CDK2/4fku_D_60K.pcd', '../ligands/CDK2/4fkv_B_61K.pcd', '../ligands/CDK2/4fkw_B_62K.pcd']


In [16]:
files = [os.path.join(path, f) for f in os.listdir(path) if os.path.splitext(f)[-1] == ".pcd" and os.path.splitext(f)[0][-4:] == "tran"]


# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(key=lambda f: int(names[os.path.splitext(os.path.basename(f))[0].replace("_tran", "")].replace("CS", "")))

print(files)

tpcds = []
tmols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    tpcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf"))
    mol = next(s)
    tmols.append(mol)

['../ligands/CDK2/4ek4_B_1CK_tran.pcd', '../ligands/CDK2/4ek5_B_03K_tran.pcd', '../ligands/CDK2/4fkg_B_4CK_tran.pcd', '../ligands/CDK2/4fki_B_09K_tran.pcd', '../ligands/CDK2/4fkj_B_11K_tran.pcd', '../ligands/CDK2/3sw4_B_18K_tran.pcd', '../ligands/CDK2/3sw7_B_19K_tran.pcd', '../ligands/CDK2/4fko_B_20K_tran.pcd', '../ligands/CDK2/4fkp_B_LS5_tran.pcd', '../ligands/CDK2/4fkq_B_42K_tran.pcd', '../ligands/CDK2/4fkr_B_45K_tran.pcd', '../ligands/CDK2/4fks_B_46K_tran.pcd', '../ligands/CDK2/4fkt_B_48K_tran.pcd', '../ligands/CDK2/4fku_D_60K_tran.pcd', '../ligands/CDK2/4fkv_B_61K_tran.pcd', '../ligands/CDK2/4fkw_B_62K_tran.pcd']


In [17]:
for i, f in enumerate(files):
    rmsd_i, rmsd_f, _, _ = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])
    rmsds["system"].append("CDK2")
    rmsds["lig"].append(names[os.path.splitext(os.path.basename(f))[0].replace("_tran", "")])
    rmsds["RMSDi"].append(rmsd_i)
    rmsds["RMSDf"].append(rmsd_f)
    rmsds["idx"].append(i)

### Failures

In [18]:
df = pd.DataFrame(rmsds)
df = df[df.system == "CDK2"]

In [19]:
for _, (i, lig) in df[df.RMSDf > 0.1][["idx", "lig"]].iterrows():
    _, _, p, tmol = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])

    with Chem.SDWriter(f"failed/{lig}_alignfail.sdf") as w:
        w.write(tmol,  confId=1)

    display(p)

<py3Dmol.view at 0x7ff51acf92d0>

## Show Results

In [20]:
df = pd.DataFrame(rmsds)
df

Unnamed: 0,system,lig,RMSDi,RMSDf,idx
0,BRD4,ligand-1,14.533324,0.091224,0
1,BRD4,ligand-2,3.482161,0.06069,1
2,BRD4,ligand-3,16.627933,0.023853,2
3,BRD4,ligand-4,16.260454,0.08914,3
4,BRD4,ligand-5,7.331523,0.034615,4
5,BRD4,ligand-6,19.503197,0.035842,5
6,BRD4,ligand-7,21.968921,0.020045,6
7,BRD4,ligand-8,3.888344,0.035135,7
8,BRD4,ligand-9,7.502592,0.041128,8
9,BRD4,ligand-10,23.961168,0.029155,9


In [21]:
df.to_csv("rmsds.csv", index=False, float_format="%.5f")