In [4]:
# import what is needed
from openeye.oechem import *
from __future__ import print_function
from blues import ncmc
from blues import utils
import parmed
from simtk import unit, openmm
from datetime import datetime
from optparse import OptionParser
import os

In [20]:
def pDB2OEMol(pdbfile):
    '''This function takes in a pdbfile as a string (e.g. 'protein.pdb') and reads it into and OEGraphMol'''
    
    # check if file exists
    if os.path.isfile(pdbfile):
        # read file into an input stream
        ifs = oemolistream(pdbfile)
        # set the format of the input stream to pdb (other wise SMI default)
        ifs.SetFormat(OEFormat_PDB)
        # create OEMol destination
        pdb_OEMol = OEGraphMol()
        # assign input stream to OEMol
        OEReadMolecule(ifs, pdb_OEMol)
        return pdb_OEMol
    else:
        print('PDB filename not found.')

In [41]:
def getBackboneAtoms(molecule):
    '''This function takes a OEGraphMol PDB structure and returns a list of backbone atoms'''
    backbone_atoms = []
    # Call this function to find atoms and bonds
    OEFindRingAtomsAndBonds(molecule)
    #OEPerceiveResidues(pdb_OEMol)
    if not OEHasResidues(molecule):
            OEPerceiveResidues(molecule, OEPreserveResInfo_All)
    aiter = molecule.GetAtoms(OEIsBackboneAtom())
    for atom in aiter:
        bb_atom_idx = atom.GetIdx()
        backbone_atoms.append(bb_atom_idx)
    return backbone_atoms


In [35]:
def getTargetAtoms(molecule, residue_list):
    '''This function takes a OEGraphMol PDB structure and a list of residue numbers and 
        generates a dictionary containing all the non-backbone, heavy atom locations and indicies for those residues.
        Note: The atom indicies start at 0 and are thus -1 from the PDB file indicies'''

    # Call this function to find atoms and bonds
    OEFindRingAtomsAndBonds(molecule)

    # create and clear dictionary to store atoms that make up residue list
    qry_atoms = {}
    qry_atoms.clear()
    
    # call backbone function and create backbone atom list
    backbone = getBackboneAtoms(molecule)
    
    print('Searching residues for heavy atoms...')
    # loop through all the atoms in the PDB OEGraphMol structure
    for atom in molecule.GetAtoms():
        # check if the atom is a heavy atom
        if atom.GetAtomicNum() > 1 and atom.GetIdx() not in backbone:
            # if heavy, find what residue it is associated with
            myres = OEAtomGetResidue(atom)
            # check if the residue number is amongst the list of residues
            if myres.GetResidueNumber() in residue_list:
                # store the atom location in a query atom dict keyed by its atom index
                qry_atoms.update({atom : atom.GetIdx()})
                print('Found',atom) 
    print('\n')

    #return dictionary of residue atoms and indicies
    return qry_atoms

In [36]:
def findHeavyRotBonds(pdb_OEMol, qry_atoms):
    '''This function takes in an OEGraphMol PDB structure as well as a dictionary of atom locations (keys) 
        and atom indicies.  It loops over the query atoms and identifies the bonds associated with each atom.
        It stores and returns the bond indicies (keys) and the two atom indicies for each bond in a dictionary
        **Note: atom indicies start at 0, so are offset by 1 compared to pdb)'''
    
    # Call this function to find atoms and bonds
    OEFindRingAtomsAndBonds(pdb_OEMol)
    
    # create and clear dictionary to store bond and atom indicies that are rotatable + heavy
    rot_atoms = {}
    rot_atoms.clear()
    
    for atom in qry_atoms:
        for bond in atom.GetBonds():
            # retrieve the begnning and ending atoms
            begatom = bond.GetBgn()
            endatom = bond.GetEnd()
            # if begnnning and ending atoms are not Hydrogen, and the bond is rotatable
            if endatom.GetAtomicNum() >1 and begatom.GetAtomicNum() >1 and OEIsRotor:
                # if the bond has not been added to dictionary already..
                # (as would happen if one of the atom pairs was previously looped over)
                if bond.GetIdx() not in rot_atoms:
                    # print the bond index
                    print('Bond number',bond, 'is rotatable and contains only heavy atoms')
                    # store bond index number (key) and atom indicies in dictionary if not already there
                    rot_atoms.update({bond.GetIdx() : {'AtomIdx_1' : bond.GetBgnIdx(), 'AtomIdx_2': bond.GetEndIdx()}})


    # Return dictionary with bond atom indicies keyed by bond index
    return rot_atoms


In [37]:
def getRotBonds(pdbfile,residue_list):
    '''This function takes in a PDB filename (as a string) and list of residue numbers.  It returns
        a dictionary of rotatable bonds (containing only heavy atoms), that are keyed by bond index 
        and containing the associated atom indicies
        **Note: The atom indicies start at 0, and are offset by -1 from the PDB file indicies'''
    # read .pdb file into OEGraphMol
    structure = pDB2OEMol(pdbfile)
    print('\nPDB file opened into OEGraphMol\n')
    # Generate dictionary containing locations and indicies of heavy residue atoms
    qry_atoms = getTargetAtoms(structure, residue_list)
    print('Dictionary of query atoms generated from residue list\n')
    # Identify bonds containing query atoms and return dictionary of indicies
    rot_atoms = findHeavyRotBonds(structure,qry_atoms)
    print('\nRotable bond and atom index dictionary generated with format: {Bond Index: {Atom1Index:, Atom2Index:}}')
    print('\n')
    print(rot_atoms)

In [42]:
#Test GetRotBonds function using .pdb file and list of residues

# Load parmed files and output as pdb
prmtop = utils.get_data_filename('blues', 'tests/data/eqToluene.prmtop')
inpcrd = utils.get_data_filename('blues', 'tests/data/eqToluene.inpcrd')
# Load these into a ParmEd Structure called `struct`
struct = parmed.load_file(prmtop, xyz=inpcrd)
prot = struct.save('protein.pdb', overwrite = True)


# define a residue list
my_residues = [111]

# call function
my_rotatable_atoms = getRotBonds('protein.pdb', my_residues)


PDB file opened into OEGraphMol

Searching residues for heavy atoms...
Found 1737 C
Found 1739 C
Found 1743 C


Dictionary of query atoms generated from residue list

Bond number 1746 (1737C-1735C) is rotatable and contains only heavy atoms
Bond number 1748 (1739C-1737C) is rotatable and contains only heavy atoms
Bond number 1752 (1743C-1737C) is rotatable and contains only heavy atoms

Rotable bond and atom index dictionary generated with format: {Bond Index: {Atom1Index:, Atom2Index:}}


{1752: {'AtomIdx_1': 1743, 'AtomIdx_2': 1737}, 1746: {'AtomIdx_1': 1737, 'AtomIdx_2': 1735}, 1748: {'AtomIdx_1': 1739, 'AtomIdx_2': 1737}}


In [34]:
pdb_OEMol = pDB2OEMol('protein.pdb')
y = getBackboneAtoms(pdb_OEMol)

[0, 4, 17, 18, 19, 21, 31, 32, 33, 35, 50, 51, 52, 54, 70, 71, 72, 74, 85, 86, 87, 89, 102, 103, 104, 106, 121, 122, 123, 125, 145, 146, 147, 149, 164, 165, 166, 168, 176, 177, 178, 180, 191, 192, 193, 195, 198, 199, 200, 202, 217, 218, 219, 221, 241, 242, 243, 245, 260, 261, 262, 264, 282, 283, 284, 286, 301, 302, 303, 305, 322, 323, 324, 326, 344, 345, 346, 348, 356, 357, 358, 360, 370, 371, 372, 374, 385, 386, 387, 389, 392, 393, 394, 396, 413, 414, 415, 417, 434, 435, 436, 438, 448, 449, 450, 452, 467, 468, 469, 471, 474, 475, 476, 478, 493, 494, 495, 497, 500, 501, 502, 504, 517, 518, 519, 521, 536, 537, 538, 540, 555, 556, 557, 559, 569, 570, 571, 573, 591, 592, 593, 595, 602, 603, 604, 614, 616, 617, 618, 620, 627, 628, 629, 631, 646, 647, 648, 650, 660, 661, 662, 664, 670, 671, 672, 674, 680, 681, 682, 684, 702, 703, 704, 706, 713, 714, 715, 717, 728, 729, 730, 732, 747, 748, 749, 751, 759, 760, 761, 763, 781, 782, 783, 785, 791, 792, 793, 795, 810, 811, 812, 814, 817, 818, 819