# Align Molecules

In this notebook we explire if we can use SENSAAS with point clouds from `libmolgrid`-generated densities in order to align molecules (as opposed to the molecular surfaces SENSAAS computes).

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [2]:
# sensaas: sensaas coloring method for PCDs
# molgrid: libmolgrid coloring method for PCDs
method = "molgrid"

In [3]:
dir = f"failed-{method}/"
for f in os.listdir(dir):
    os.remove(os.path.join(dir, f))

### Libraries

In [4]:
import sys

sys.path.append("../../")
sys.path.append("../")

In [5]:
import open3d as o3d
import pandas as pd
from collections import defaultdict

import re, os
from rdkit import Chem

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [6]:
from utils import align_and_show

Store RMSD before and after alignment (for both BRD4 and CDK2)

In [7]:
rmsds = defaultdict(list)

## BRD4 Inhibitors

In [8]:
path = "../ligands/BRD4"

Loda original BRD4 inhibitors together with their point cloud representation (pre-computed):

In [9]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and method in f and not "tran" in f:
        files.append(os.path.join(path, f))

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    mols.append(mol)

['../ligands/BRD4/ligand-1_molgrid.pcd', '../ligands/BRD4/ligand-2_molgrid.pcd', '../ligands/BRD4/ligand-3_molgrid.pcd', '../ligands/BRD4/ligand-4_molgrid.pcd', '../ligands/BRD4/ligand-5_molgrid.pcd', '../ligands/BRD4/ligand-6_molgrid.pcd', '../ligands/BRD4/ligand-7_molgrid.pcd', '../ligands/BRD4/ligand-8_molgrid.pcd', '../ligands/BRD4/ligand-9_molgrid.pcd', '../ligands/BRD4/ligand-10_molgrid.pcd']


Load translated and rotated BRD4 inhibitors with their point-cloud representation (pre-computed):

In [10]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and f"tran_{method}" in fname:
        files.append(os.path.join(path, f))

# Sort BRD4 ligand files by number
r = re.compile("\d{1,2}")
files.sort(key=lambda f: int(r.search(os.path.basename(f)).group()))

print(files)

tpcds = []
tmols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    tpcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    tmols.append(mol)

['../ligands/BRD4/ligand-1_tran_molgrid.pcd', '../ligands/BRD4/ligand-2_tran_molgrid.pcd', '../ligands/BRD4/ligand-3_tran_molgrid.pcd', '../ligands/BRD4/ligand-4_tran_molgrid.pcd', '../ligands/BRD4/ligand-5_tran_molgrid.pcd', '../ligands/BRD4/ligand-6_tran_molgrid.pcd', '../ligands/BRD4/ligand-7_tran_molgrid.pcd', '../ligands/BRD4/ligand-8_tran_molgrid.pcd', '../ligands/BRD4/ligand-9_tran_molgrid.pcd', '../ligands/BRD4/ligand-10_tran_molgrid.pcd']


In [11]:
for i, f in enumerate(files):
    rmsd_i, rmsd_f, _, _ = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])
    rmsds["system"].append("BRD4")
    rmsds["lig"].append(os.path.splitext(os.path.basename(f))[0].replace("_tran", ""))
    rmsds["RMSDi"].append(rmsd_i)
    rmsds["RMSDf"].append(rmsd_f)
    rmsds["idx"].append(i)

### Failures

In [12]:
df = pd.DataFrame(rmsds)

In [13]:
df[df.RMSDf > 2.0]

Unnamed: 0,system,lig,RMSDi,RMSDf,idx


In [14]:
for _, (i, lig) in df[df.RMSDf > 0.1][["idx", "lig"]].iterrows():
    _, _, p, tmol = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])

    with Chem.SDWriter(f"failed/{lig}_alignfail.sdf") as w:
        w.write(tmol, confId=1)

    display(p)

## CDK2 Inhibitors

In [15]:
path = "../ligands/CDK2"

In [16]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and method in f and not "tran" in f:
        files.append(os.path.join(path, f))

# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(
    key=lambda f: int(
        names[
            os.path.splitext(os.path.basename(f))[0].replace(f"_{method}", "")
        ].replace("CS", "")
    )
)

print(files)

pcds = []
mols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    pcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    mols.append(mol)

['../ligands/CDK2/4ek4_B_1CK_molgrid.pcd', '../ligands/CDK2/4ek5_B_03K_molgrid.pcd', '../ligands/CDK2/4fkg_B_4CK_molgrid.pcd', '../ligands/CDK2/4fki_B_09K_molgrid.pcd', '../ligands/CDK2/4fkj_B_11K_molgrid.pcd', '../ligands/CDK2/3sw4_B_18K_molgrid.pcd', '../ligands/CDK2/3sw7_B_19K_molgrid.pcd', '../ligands/CDK2/4fko_B_20K_molgrid.pcd', '../ligands/CDK2/4fkp_B_LS5_molgrid.pcd', '../ligands/CDK2/4fkq_B_42K_molgrid.pcd', '../ligands/CDK2/4fkr_B_45K_molgrid.pcd', '../ligands/CDK2/4fks_B_46K_molgrid.pcd', '../ligands/CDK2/4fkt_B_48K_molgrid.pcd', '../ligands/CDK2/4fku_D_60K_molgrid.pcd', '../ligands/CDK2/4fkv_B_61K_molgrid.pcd', '../ligands/CDK2/4fkw_B_62K_molgrid.pcd']


In [17]:
files = []
for f in os.listdir(path):
    fname, ext = os.path.splitext(f)
    if ext == ".pcd" and f"tran_{method}" in fname:
        files.append(os.path.join(path, f))

# Order ligands
# This should make the three chemical series pop-up in the PCD fit
names = {
    "4ek4_B_1CK": "CS1",
    "4ek5_B_03K": "CS3",
    "4fkg_B_4CK": "CS4",
    "4fki_B_09K": "CS9",
    "4fkj_B_11K": "CS11",
    "3sw4_B_18K": "CS18",
    "3sw7_B_19K": "CS19",
    "4fko_B_20K": "CS20",
    "4fkp_B_LS5": "CS241",
    "4fkq_B_42K": "CS242",
    "4fkr_B_45K": "CS245",
    "4fks_B_46K": "CS246",
    "4fkt_B_48K": "CS248",
    "4fku_D_60K": "CS260",
    "4fkv_B_61K": "CS261",
    "4fkw_B_62K": "CS262",
}

files.sort(
    key=lambda f: int(
        names[
            os.path.splitext(os.path.basename(f))[0].replace(f"_tran_{method}", "")
        ].replace("CS", "")
    )
)

print(files)

tpcds = []
tmols = []
for f in files:
    pcd = o3d.io.read_point_cloud(f)
    tpcds.append(pcd)

    s = Chem.SDMolSupplier(f.replace(".pcd", ".sdf").replace(f"_{method}", ""))
    mol = next(s)
    tmols.append(mol)

['../ligands/CDK2/4ek4_B_1CK_tran_molgrid.pcd', '../ligands/CDK2/4ek5_B_03K_tran_molgrid.pcd', '../ligands/CDK2/4fkg_B_4CK_tran_molgrid.pcd', '../ligands/CDK2/4fki_B_09K_tran_molgrid.pcd', '../ligands/CDK2/4fkj_B_11K_tran_molgrid.pcd', '../ligands/CDK2/3sw4_B_18K_tran_molgrid.pcd', '../ligands/CDK2/3sw7_B_19K_tran_molgrid.pcd', '../ligands/CDK2/4fko_B_20K_tran_molgrid.pcd', '../ligands/CDK2/4fkp_B_LS5_tran_molgrid.pcd', '../ligands/CDK2/4fkq_B_42K_tran_molgrid.pcd', '../ligands/CDK2/4fkr_B_45K_tran_molgrid.pcd', '../ligands/CDK2/4fks_B_46K_tran_molgrid.pcd', '../ligands/CDK2/4fkt_B_48K_tran_molgrid.pcd', '../ligands/CDK2/4fku_D_60K_tran_molgrid.pcd', '../ligands/CDK2/4fkv_B_61K_tran_molgrid.pcd', '../ligands/CDK2/4fkw_B_62K_tran_molgrid.pcd']


In [18]:
for i, f in enumerate(files):
    rmsd_i, rmsd_f, _, _ = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])
    rmsds["system"].append("CDK2")
    rmsds["lig"].append(
        names[os.path.splitext(os.path.basename(f))[0].replace(f"_tran_{method}", "")]
    )
    rmsds["RMSDi"].append(rmsd_i)
    rmsds["RMSDf"].append(rmsd_f)
    rmsds["idx"].append(i)

### Failures

In [19]:
df = pd.DataFrame(rmsds)
df = df[df.system == "CDK2"]

In [20]:
for _, (i, lig) in df[df.RMSDf > 0.1][["idx", "lig"]].iterrows():
    _, _, p, tmol = align_and_show(mols[i], pcds[i], tmols[i], tpcds[i])

    with Chem.SDWriter(f"failed-{method}/{lig}_alignfail.sdf") as w:
        w.write(tmol, confId=1)

    display(p)

<py3Dmol.view at 0x7fdf432c89d0>

## Show Results

In [21]:
df = pd.DataFrame(rmsds)
df

Unnamed: 0,system,lig,RMSDi,RMSDf,idx
0,BRD4,ligand-1_molgrid,14.533324,0.089485,0
1,BRD4,ligand-2_molgrid,3.482161,0.059879,1
2,BRD4,ligand-3_molgrid,16.627933,0.025493,2
3,BRD4,ligand-4_molgrid,16.260454,0.088715,3
4,BRD4,ligand-5_molgrid,7.331523,0.034777,4
5,BRD4,ligand-6_molgrid,19.503197,0.033594,5
6,BRD4,ligand-7_molgrid,21.968921,0.019354,6
7,BRD4,ligand-8_molgrid,3.888344,0.035938,7
8,BRD4,ligand-9_molgrid,7.502592,0.040493,8
9,BRD4,ligand-10_molgrid,23.961168,0.030577,9


In [22]:
df[df.RMSDf > 2.0]

Unnamed: 0,system,lig,RMSDi,RMSDf,idx
15,CDK2,CS18,52.59247,6.748063,5


In [23]:
df.to_csv(f"rmsds-{method}.csv", index=False, float_format="%.5f")