diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py new file mode 100644 index 00000000..69501def --- /dev/null +++ b/deeprank/models/pair.py @@ -0,0 +1,26 @@ + + +class Pair: + """ A hashable, comparable object for any set of two inputs where order doesn't matter. + + Args: + item1 (object): the pair's first object + item2 (object): the pair's second object + """ + + def __init__(self, item1, item2): + self.item1 = item1 + self.item2 = item2 + + def __hash__(self): + # The hash should be solely based on the two paired items, not on their order. + # So rearrange the two items and turn them into a hashable tuple. + return hash(tuple(sorted([self.item1, self.item2]))) + + def __eq__(self, other): + # Compare the pairs as sets, so the order doesn't matter. + return {self.item1, self.item2} == {other.item1, other.item2} + + def __iter__(self): + # Iterate over the two items in the pair. + return iter([self.item1, self.item2]) diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py new file mode 100644 index 00000000..6e1e11bf --- /dev/null +++ b/deeprank/operate/pdb.py @@ -0,0 +1,76 @@ +import numpy +import logging + +from deeprank.models.pair import Pair + + +_log = logging.getLogger(__name__) + +def get_squared_distance(position1, position2): + """Get squared euclidean distance, this is computationally cheaper that the euclidean distance + + Args: + position1 (numpy vector): first position + position2 (numpy vector): second position + + Returns (float): the squared distance + """ + + return numpy.sum(numpy.square(position1 - position2)) + + +def get_distance(position1, position2): + """ Get euclidean distance between two positions in space. + + Args: + position1 (numpy vector): first position + position2 (numpy vector): second position + + Returns (float): the distance + """ + + return numpy.sqrt(get_squared_distance(position1, position2)) + + +def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance): + """ Find interatomic contacts around a residue. + + Args: + pdb2sql (pdb2sql object): the pdb structure that we're investigating + chain_id (str): the chain identifier, where the residue is located + residue_number (int): the residue number of interest within the chain + max_interatomic_distance (float): maximum distance between two atoms + + Returns ([Pair(int, int)]): pairs of atom numbers that contact each other + """ + + # Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair. + squared_max_interatomic_distance = numpy.square(max_interatomic_distance) + + # List all the atoms in the selected residue, take the coordinates while we're at it: + residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number) + if len(residue_atoms) == 0: + raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number)) + + # List all the atoms in the pdb file, take the coordinates while we're at it: + atoms = pdb2sql.get('rowID,x,y,z') + + # Iterate over all the atoms in the pdb, to find neighbours. + contact_atom_pairs = set([]) + for atom_nr, x, y, z in atoms: + + atom_position = numpy.array([x, y, z]) + + # Within the atom iteration, iterate over the atoms in the residue: + for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms: + + residue_position = numpy.array([residue_x, residue_y, residue_z]) + + # Check that the two atom numbers are not the same and check their distance: + if atom_nr != residue_atom_nr and \ + get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance: + + contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr)) + + + return contact_atom_pairs diff --git a/test/models/test_pair.py b/test/models/test_pair.py new file mode 100644 index 00000000..fd7e1b25 --- /dev/null +++ b/test/models/test_pair.py @@ -0,0 +1,31 @@ +from nose.tools import eq_, ok_ + +from deeprank.models.pair import Pair + + +def test_order_independency(): + # These should be the same: + pair1 = Pair(1, 2) + pair2 = Pair(2, 1) + + # test comparing: + eq_(pair1, pair2) + + # test hashing: + d = {pair1: 1} + d[pair2] = 2 + eq_(d[pair1], 2) + + +def test_uniqueness(): + # These should be different: + pair1 = Pair(1, 2) + pair2 = Pair(1, 3) + + # test comparing: + ok_(pair1 != pair2) + + # test hashing: + d = {pair1: 1} + d[pair2] = 2 + eq_(d[pair1], 1) diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py new file mode 100644 index 00000000..b8c1f755 --- /dev/null +++ b/test/operate/test_pdb.py @@ -0,0 +1,45 @@ +import numpy +import logging +import pkg_resources +import os + +from pdb2sql import pdb2sql +from nose.tools import ok_ + +from deeprank.operate.pdb import get_residue_contact_atom_pairs + + +_log = logging.getLogger(__name__) + +def test_residue_contact_atoms(): + + pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''), + "../1AK4/native/1AK4.pdb") + + try: + pdb = pdb2sql(pdb_path) + + contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5) + + # List all the atoms in the pairs that we found: + contact_atoms = set([]) + for atom1, atom2 in contact_atom_pairs: + contact_atoms.add(atom1) + contact_atoms.add(atom2) + + # Ask pdb2sql for the residue identifiers & atom names: + contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))] + finally: + pdb._close() + + # Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs. + # We do so by selecting one close residue and one distant residue. + + neighbour = ('C', 144, 'CA') # this residue sits right next to the selected residue + distant = ('B', 134, 'OE2') # this residue is very far away + + # Check that the close residue is present in the list and that the distant residue is absent. + + ok_(neighbour in contact_atom_names) + + ok_(distant not in contact_atom_names)