In [8]:
import os
import csv
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from networkx.algorithms import isomorphism
import networkx as nx

# change your path here
root_dir = './dynamicbind'
output_csv = './dynamicbind/dynamicbind_rmsd.csv'

def read_sdf(file_path):
    mol = Chem.SDMolSupplier(file_path)[0]
    return mol

def mol_to_graph(mol):
    G = nx.Graph()
    
    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx(), atom_type=atom.GetSymbol())
    
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
    
    return G

def calculate_rmsd(mol1, mol2, atom_map):
    coords1 = mol1.GetConformer().GetPositions()
    coords2 = mol2.GetConformer().GetPositions()
    
    mapped_coords1 = np.array([coords1[i] for i in atom_map.keys()])
    mapped_coords2 = np.array([coords2[i] for i in atom_map.values()])
    
    diff = mapped_coords1 - mapped_coords2
    #print(coords1, coords2), print(mapped_coords1, mapped_coords2), print(diff)
    return np.sqrt(np.mean(np.sum(diff**2, axis=1)))

def calculate_rmsd_without_alignment(sdf_file1, sdf_file2):
    mol1 = read_sdf(sdf_file1)
    mol2 = read_sdf(sdf_file2)
    
    graph1 = mol_to_graph(mol1)
    if mol2 is None:
        return None
    else:
        graph2 = mol_to_graph(mol2)
    
    nm = isomorphism.GraphMatcher(graph1, graph2, 
                                  node_match=lambda n1, n2: n1['atom_type'] == n2['atom_type'])
    
    if nm.is_isomorphic():
        atom_map = nm.mapping
        rmsd = calculate_rmsd(mol1, mol2, atom_map)
        return rmsd
    else:
        return None

def process_sdfs(root_dir,output_csv):
    if os.path.exists(output_csv):
        os.remove(output_csv)
    # Get all the subdir name in the rootdir
    subdir_list = os.listdir(root_dir)
    with open(output_csv, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['pdb_id', 'rmsd'])
    for pdb_id in subdir_list:
        # Get the ligand.sdf file path
        sdf_file1 = f'{root_dir}/{pdb_id}/{pdb_id}_docked_ligand.sdf'
        sdf_file2 = f'/mnt/data/posebusters/posebusters_benchmark_set/{pdb_id}/{pdb_id}_ligand.sdf'
        rmsd = calculate_rmsd_without_alignment(sdf_file1, sdf_file2)
        if rmsd is not None:
            print(f"{pdb_id} RMSD without alignment: {rmsd:.4f}")
        else:
            print(f"{pdb_id} The molecules are not isomorphic.")   
        with open(output_csv, 'a', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([pdb_id, rmsd])

process_sdfs(root_dir,output_csv)

8GFD_ZHR RMSD without alignment: 3.2615
7SIU_9ID RMSD without alignment: 47.2155
7CUO_PHB RMSD without alignment: 4.6656
7NR6_UO8 RMSD without alignment: 2.6685
7UAW_MF6 RMSD without alignment: 22.2183
7JG0_GAR RMSD without alignment: 9.9615
8H0M_2EH RMSD without alignment: 5.2954
7KLX_WOV RMSD without alignment: 16.6474
7KQU_YOF RMSD without alignment: 29.2009
7Z7F_IF3 RMSD without alignment: 3.6126
8EAD_UY0 RMSD without alignment: 1.5367
7R59_I5F RMSD without alignment: 1.5164
7ZXZ_K9R RMSD without alignment: 18.7850
8BOM_QU6 RMSD without alignment: 3.7167
7JR8_VH7 RMSD without alignment: 25.9528
7PGX_FMN RMSD without alignment: 4.7687
8CI0_8EL RMSD without alignment: 97.3319
8BN6_R53 RMSD without alignment: 1.3924
7R9N_F97 RMSD without alignment: 0.9745
7WCF_ACP RMSD without alignment: 2.4535
7ODX_DGP RMSD without alignment: 2.3278
7EN7_J79 RMSD without alignment: 4.5594
7UMV_NUU RMSD without alignment: 16.3391
7XI7_4RI RMSD without alignment: 1.3583
7RUI_7QZ RMSD without alignment:

In [9]:
data = pd.read_csv(output_csv)

# rmsd < 2
valid_rmsd = data[data['rmsd'] != -1]
rmsd_less_than_2 = valid_rmsd[valid_rmsd['rmsd'] < 2]
rmsd_less_than_2_ratio = len(rmsd_less_than_2) / len(data)

# success rate
total_entries = len(data)
successful_entries = len(rmsd_less_than_2)
success_rate = successful_entries / total_entries

print(f"rmsd < 2: {rmsd_less_than_2_ratio:.2%}")
print(f"success rate: {success_rate:.2%}")

rmsd < 2: 15.70%
success rate: 15.70%
