In this notebook, we will generate IFPs for all the docked compounds from the previous step. We will first generate the full interaction fingerprint bit vector, after which we will start filtering compounds and writing them out in a new file.

In [2]:
!pip install oddt

Collecting oddt
  Using cached oddt-0.7-py2.py3-none-any.whl
Collecting numpydoc
  Using cached numpydoc-1.5.0-py3-none-any.whl (52 kB)
Collecting sphinx>=4.2
  Using cached sphinx-7.0.1-py3-none-any.whl (3.0 MB)
Collecting snowballstemmer>=2.0
  Using cached snowballstemmer-2.2.0-py2.py3-none-any.whl (93 kB)
Collecting docutils<0.21,>=0.18.1
  Using cached docutils-0.20.1-py3-none-any.whl (572 kB)
Collecting sphinxcontrib-applehelp
  Using cached sphinxcontrib_applehelp-1.0.4-py3-none-any.whl (120 kB)
Collecting alabaster<0.8,>=0.7
  Using cached alabaster-0.7.13-py3-none-any.whl (13 kB)
Collecting sphinxcontrib-serializinghtml>=1.1.5
  Using cached sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl (94 kB)
Collecting sphinxcontrib-qthelp
  Using cached sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl (90 kB)
Collecting sphinxcontrib-devhelp
  Using cached sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl (84 kB)
Collecting sphinxcontrib-jsmath
  Using cached sphinxcontrib_jsmath-

In [25]:
import oddt
from oddt.fingerprints import (InteractionFingerprint,
                               SimpleInteractionFingerprint,
                               dice)
from scripts import autodock
from rdkit import Chem

In [26]:
mol = autodock.MolFromPDBQTBlock('workdir/docking_results.pdbqt',sanitize=False)
mols = [mol]
writer = Chem.rdmolfiles.SDWriter('out.sdf')
for mol in mols:
    writer.write(mol)

In [None]:
def split(list_a, chunk_size):

    for i in range(0, len(list_a), chunk_size):
        yield list_a[i:i + chunk_size]

chunk_size = 8


#143, # Trp
#147, # Phe

resnrs = {
173: 'Phe',
254: 'Asn',
}

protein = next(oddt.toolkit.readfile('pdb', 'data/A2b_AF_GPCRdb.pdb'))
#protein = autodock.MolFromPDBQTBlock('workdir/protein.pdbqt',sanitize=False)
protein.protein = True

# this block will load all sdfs
mols = list(oddt.toolkit.readfile('sdf', 'out.sdf'))

#mols = list(oddt.toolkit.readfile('sdf', 'step01_10k_clustered_pocketC.sdf'))

#    - (Column 0) hydrophobic contacts
#    - (Column 1) aromatic face to face
#    - (Column 2) aromatic edge to face
#    - (Column 3) hydrogen bond (protein as hydrogen bond donor)
#    - (Column 4) hydrogen bond (protein as hydrogen bond acceptor)
#    - (Column 5) salt bridges (protein positively charged)
#    - (Column 6) salt bridges (protein negatively charged)
#    - (Column 7) salt bridges (ionic bond with metal ion)
print("Generating IFPs")

with open('workdir/docked_mols_IFP.sdf', 'w') as outfile:
    for mol in mols:
        molstring = (oddt.toolkits.rdk.Molecule(mol).write(format='sdf'))
        #for key, value in mol.data.to_dict().items():
        #    molstring = molstring + '\n> <{}>\n{}\n'.format(key, value)

        IFP = oddt.fingerprints.InteractionFingerprint(mol, protein, strict=True)
        
        for resnr, resname in resnrs.items():        
            srt = (resnr-1) * 8
            end = (srt+9)
            fp = list(split(IFP, chunk_size))[resnr-1]
            print(fp)

            # format residue properties
            molstring = molstring + '\n> <{}{}_donor>\n{}\n'.format(resname,resnr,fp[3])
            molstring = molstring + '\n> <{}{}_acceptor>\n{}\n'.format(resname,resnr,fp[4])

        molstring = molstring + '\n$$$$\n'
        outfile.write(molstring)