Merge df2756e into 0f6bb59

DeepRank · Apr 6, 2021 · a0e3c60 · a0e3c60
2 parents 0f6bb59 + df2756e
commit a0e3c60
Show file tree

Hide file tree

Showing 6 changed files with 287 additions and 0 deletions.
diff --git a/deeprank/models/atom.py b/deeprank/models/atom.py
@@ -0,0 +1,22 @@
+
+
+class Atom:
+    def __init__(self, id_, position, chain_id, name, residue=None):
+        self.id = id_
+        self.name = name
+        self.chain_id = chain_id
+        self.position = position
+        self.residue = residue
+
+    def __hash__(self):
+        return hash(self.id)
+
+    def __eq__(self, other):
+        return self.id == other.id
+
+    def __lt__(self, other):
+        return self.id < other.id
+
+    def __repr__(self):
+        return "Atom {} ({}) from {} {} at {}".format(self.id, self.chain_id, self.residue, self.name, self.position)
+
diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py
@@ -0,0 +1,26 @@
+
+
+class Pair:
+    """ A hashable, comparable object for any set of two inputs where order doesn't matter.
+
+        Args:
+            item1 (object): the pair's first object
+            item2 (object): the pair's second object
+    """
+
+    def __init__(self, item1, item2):
+        self.item1 = item1
+        self.item2 = item2
+
+    def __hash__(self):
+        # The hash should be solely based on the two paired items, not on their order.
+        # So rearrange the two items and turn them into a hashable tuple.
+        return hash(tuple(sorted([self.item1, self.item2])))
+
+    def __eq__(self, other):
+        # Compare the pairs as sets, so the order doesn't matter.
+        return {self.item1, self.item2} == {other.item1, other.item2}
+
+    def __iter__(self):
+        # Iterate over the two items in the pair.
+        return iter([self.item1, self.item2])
diff --git a/deeprank/models/residue.py b/deeprank/models/residue.py
@@ -0,0 +1,24 @@
+
+
+class Residue:
+    def __init__(self, number, name):
+        self.number = number
+        self.name = name
+        self.atoms = []
+
+    @property
+    def chain_id(self):
+        chain_ids = set([atom.chain_id for atom in self.atoms])
+        if len(chain_ids) > 1:
+            raise ValueError("residue {} {} contains atoms of different chains: {}".format(self.name, self.number, self.atoms))
+
+        return list(chain_ids)[0]
+
+    def __hash__(self):
+        return hash((self.chain_id, self.number))
+
+    def __eq__(self, other):
+        return self.chain_id == other.chain_id and self.number == other.number
+
+    def __repr__(self):
+        return "Residue {} {}".format(self.name, self.number)
diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py
@@ -0,0 +1,98 @@
+import numpy
+import logging
+
+from deeprank.models.pair import Pair
+from deeprank.models.atom import Atom
+from deeprank.models.residue import Residue
+
+
+_log = logging.getLogger(__name__)
+
+
+def get_distance(position1, position2):
+    """ Get euclidean distance between two positions in space.
+
+        Args:
+            position1 (numpy vector): first position
+            position2 (numpy vector): second position
+
+        Returns (float): the distance
+    """
+
+    return numpy.sqrt(numpy.sum(numpy.square(position1 - position2)))
+
+
+def get_atoms(pdb2sql):
+    """ Builds a list of atom objects, according to the contents of the pdb file.
+
+        Args:
+            pdb2sql (pdb2sql object): the pdb structure that we're investigating
+
+        Returns ([Atom]): all the atoms in the pdb file.
+    """
+
+    # This is a working dictionary of residues, identified by their chains and numbers.
+    residues = {}
+
+    # This is the list of atom objects, that will be returned.
+    atoms = []
+
+    # Iterate over the atom output from pdb2sql
+    for x, y, z, atom_number, atom_name, chain_id, residue_number, residue_name in \
+            pdb2sql.get("x,y,z,rowID,name,chainID,resSeq,resName"):
+
+        # Make sure that the residue is in the working directory:
+        residue_id = (chain_id, residue_number)
+        if residue_id not in residues:
+            residues[residue_id] = Residue(residue_number, residue_name)
+
+        # Turn the x,y,z into a vector:
+        atom_position = numpy.array([x, y, z])
+
+        # Create the atom object and link it to the residue:
+        atom = Atom(atom_number, atom_position, chain_id, atom_name, residues[residue_id])
+        residues[residue_id].atoms.append(atom)
+        atoms.append(atom)
+
+    return atoms
+
+
+def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance):
+    """ Find interatomic contacts around a residue.
+
+        Args:
+            pdb2sql (pdb2sql object): the pdb structure that we're investigating
+            chain_id (str): the chain identifier, where the residue is located
+            residue_number (int): the residue number of interest within the chain
+            max_interatomic_distance (float): maximum distance between two atoms
+
+        Returns ([Pair(int, int)]): pairs of atom numbers that contact each other
+    """
+
+    # get all the atoms in the pdb file:
+    atoms = get_atoms(pdb2sql)
+
+    # List all the atoms in the selected residue, take the coordinates while we're at it:
+    residue_atoms = [atom for atom in atoms if atom.chain_id == chain_id and
+                                               atom.residue.number == residue_number]
+    if len(residue_atoms) == 0:
+        raise ValueError("No atoms found in chain {} with residue number {}".format(chain_id, residue_number))
+
+    # Iterate over all the atoms in the pdb, to find neighbours.
+    contact_atom_pairs = set([])
+    for atom in atoms:
+
+        # Check that the atom is not one of the residue's own atoms:
+        if atom.chain_id == chain_id and atom.residue.number == residue_number:
+            continue
+
+        # Within the atom iteration, iterate over the atoms in the residue:
+        for residue_atom in residue_atoms:
+
+            # Check that the atom is close enough:
+            if get_distance(atom.position, residue_atom.position) < max_interatomic_distance:
+
+                contact_atom_pairs.add(Pair(residue_atom, atom))
+
+
+    return contact_atom_pairs
diff --git a/test/models/test_pair.py b/test/models/test_pair.py
@@ -0,0 +1,31 @@
+from nose.tools import eq_, ok_
+
+from deeprank.models.pair import Pair
+
+
+def test_order_independency():
+    # These should be the same:
+    pair1 = Pair(1, 2)
+    pair2 = Pair(2, 1)
+
+    # test comparing:
+    eq_(pair1, pair2)
+
+    # test hashing:
+    d = {pair1: 1}
+    d[pair2] = 2
+    eq_(d[pair1], 2)
+
+
+def test_uniqueness():
+    # These should be different:
+    pair1 = Pair(1, 2)
+    pair2 = Pair(1, 3)
+
+    # test comparing:
+    ok_(pair1 != pair2)
+
+    # test hashing:
+    d = {pair1: 1}
+    d[pair2] = 2
+    eq_(d[pair1], 1)
diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py
@@ -0,0 +1,86 @@
+import numpy
+import logging
+import pkg_resources
+import os
+
+from pdb2sql import pdb2sql
+from nose.tools import ok_
+
+from deeprank.operate.pdb import get_residue_contact_atom_pairs, get_atoms
+
+
+_log = logging.getLogger(__name__)
+
+
+def test_get_atoms():
+    pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
+                            "../1AK4/native/1AK4.pdb")
+
+    try:
+        pdb = pdb2sql(pdb_path)
+
+        atoms = get_atoms(pdb)
+
+        ok_(len(atoms) > 0)
+    finally:
+        pdb._close()
+
+
+def _find_atom(atoms, chain_id, residue_number, name):
+    matching_atoms = [atom for atom in atoms if atom.chain_id == chain_id and
+                                                atom.name == name and atom.residue.number == residue_number]
+
+    assert len(matching_atoms) == 1, "Expected exacly one matching atom, got {}".format(len(matching_atoms))
+
+    return matching_atoms[0]
+
+
+def _find_residue(atoms, chain_id, residue_number):
+    matching_atoms = [atom for atom in atoms if atom.chain_id == chain_id and
+                                                atom.residue.number == residue_number]
+
+    assert len(matching_atoms) > 0, "Expected at least one matching atom, got zero"
+
+    return matching_atoms[0].residue
+
+
+def test_residue_contact_atoms():
+
+    pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
+                            "../1AK4/native/1AK4.pdb")
+    chain_id = 'D'
+    residue_number = 145
+
+    try:
+        pdb = pdb2sql(pdb_path)
+
+        atoms = get_atoms(pdb)
+        query_residue = _find_residue(atoms, chain_id, residue_number)
+
+        contact_atom_pairs = get_residue_contact_atom_pairs(pdb, chain_id, residue_number, 8.5)
+
+        # List all the atoms in the pairs that we found:
+        contact_atoms = set([])
+        for atom1, atom2 in contact_atom_pairs:
+            contact_atoms.add(atom1)
+            contact_atoms.add(atom2)
+
+            # Be sure that this atom pair does not pair two atoms of the same residue:
+            ok_(atom1.residue != atom2.residue)
+
+            # Be sure that one of the atoms in the pair is from the query residue:
+            ok_(atom1.residue == query_residue or atom2.residue == query_residue)
+    finally:
+        pdb._close()
+
+    # Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs.
+    # We do so by selecting one close residue and one distant residue.
+
+    neighbour = _find_atom(atoms, 'D', 144, 'CA')  # this residue sits right next to the selected residue
+    distant = _find_atom(atoms, 'C', 134, 'OE2')  # this residue is very far away
+
+    # Check that the close residue is present in the list and that the distant residue is absent.
+    # Also, the function should not pair a residue with itself.
+
+    ok_(neighbour in contact_atoms)
+    ok_(distant not in contact_atoms)