From 848a39440294683af42f0ba0ac8de6ab13881580 Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 12:11:50 +0000
Subject: [PATCH 1/6] add an operating function to find the surrounding atoms
 around a residue

---
 deeprank/models/pair.py  | 17 +++++++++++++++++
 deeprank/operate/pdb.py  | 41 ++++++++++++++++++++++++++++++++++++++++
 test/operate/test_pdb.py | 34 +++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)
 create mode 100644 deeprank/models/pair.py
 create mode 100644 deeprank/operate/pdb.py
 create mode 100644 test/operate/test_pdb.py

diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py
new file mode 100644
index 00000000..71cd941b
--- /dev/null
+++ b/deeprank/models/pair.py
@@ -0,0 +1,17 @@
+
+
+class Pair:
+    "a hashable, comparable object for any set of two inputs where order doesn't matter"
+
+    def __init__(self, item1, item2):
+        self.item1 = item1
+        self.item2 = item2
+
+    def __hash__(self):
+        return hash((self.item1, self.item2))
+
+    def __eq__(self, other):
+        return {self.item1, self.item2} == {other.item1, other.item2}
+
+    def __iter__(self):
+        return iter([self.item1, self.item2])
diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py
new file mode 100644
index 00000000..9c9944a2
--- /dev/null
+++ b/deeprank/operate/pdb.py
@@ -0,0 +1,41 @@
+import numpy
+import logging
+
+from deeprank.models.pair import Pair
+
+
+_log = logging.getLogger(__name__)
+
+def get_squared_distance(position1, position2):
+    return numpy.sum(numpy.square(position1 - position2))
+
+
+def get_distance(position1, position2):
+    return numpy.sqrt(get_squared_distance(position1, position2))
+
+
+def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance):
+    squared_max_interatomic_distance = numpy.square(max_interatomic_distance)
+
+    residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number)
+    if len(residue_atoms) == 0:
+        raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number))
+
+    atoms = pdb2sql.get('rowID,x,y,z')
+
+    contact_atom_pairs = set([])
+
+    for atom_nr, x, y, z in atoms:
+
+        atom_position = numpy.array([x, y, z])
+
+        for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms:
+
+            residue_position = numpy.array([residue_x, residue_y, residue_z])
+
+            if get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance:
+
+                contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr))
+
+
+    return contact_atom_pairs
diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py
new file mode 100644
index 00000000..e66324cd
--- /dev/null
+++ b/test/operate/test_pdb.py
@@ -0,0 +1,34 @@
+import numpy
+import logging
+
+from pdb2sql import pdb2sql
+from nose.tools import ok_
+
+from deeprank.operate.pdb import get_residue_contact_atom_pairs
+
+
+_log = logging.getLogger(__name__)
+
+def test_residue_contact_atoms():
+    pdb_path = "test/1AK4/native/1AK4.pdb"
+
+    try:
+        pdb = pdb2sql(pdb_path)
+
+        contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5)
+
+        contact_atoms = set([])
+        for atom1, atom2 in contact_atom_pairs:
+            contact_atoms.add(atom1)
+            contact_atoms.add(atom2)
+
+        contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))]
+    finally:
+        pdb._close()
+
+    neighbour = ('C', 144, 'CA')
+    distant = ('B', 134, 'OE2')
+
+    ok_(neighbour in contact_atom_names)
+
+    ok_(distant not in contact_atom_names)

From 63f53a6e1ecf87d82986f3f83d66f3ac0e63be42 Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 12:28:00 +0000
Subject: [PATCH 2/6] pathname fix to make the test work from anywhere

---
 test/operate/test_pdb.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py
index e66324cd..90a3fc64 100644
--- a/test/operate/test_pdb.py
+++ b/test/operate/test_pdb.py
@@ -1,5 +1,7 @@
 import numpy
 import logging
+import pkg_resources
+import os
 
 from pdb2sql import pdb2sql
 from nose.tools import ok_
@@ -10,7 +12,9 @@
 _log = logging.getLogger(__name__)
 
 def test_residue_contact_atoms():
-    pdb_path = "test/1AK4/native/1AK4.pdb"
+
+    pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
+                            "../1AK4/native/1AK4.pdb")
 
     try:
         pdb = pdb2sql(pdb_path)

From 31907e31a690918c9870728637ebad886882d6a3 Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 13:40:09 +0000
Subject: [PATCH 3/6] explain the test with comments

---
 test/operate/test_pdb.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py
index 90a3fc64..b8c1f755 100644
--- a/test/operate/test_pdb.py
+++ b/test/operate/test_pdb.py
@@ -21,17 +21,24 @@ def test_residue_contact_atoms():
 
         contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5)
 
+        # List all the atoms in the pairs that we found:
         contact_atoms = set([])
         for atom1, atom2 in contact_atom_pairs:
             contact_atoms.add(atom1)
             contact_atoms.add(atom2)
 
+        # Ask pdb2sql for the residue identifiers & atom names:
         contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))]
     finally:
         pdb._close()
 
-    neighbour = ('C', 144, 'CA')
-    distant = ('B', 134, 'OE2')
+    # Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs.
+    # We do so by selecting one close residue and one distant residue.
+
+    neighbour = ('C', 144, 'CA')  # this residue sits right next to the selected residue
+    distant = ('B', 134, 'OE2')  # this residue is very far away
+
+    # Check that the close residue is present in the list and that the distant residue is absent.
 
     ok_(neighbour in contact_atom_names)
 

From f330c0599edd0648fcdd09bff8ab2923a272a897 Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 13:52:31 +0000
Subject: [PATCH 4/6] added some comments and do an extra check to prevent
 pairing an atom with itself

---
 deeprank/operate/pdb.py | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py
index 9c9944a2..6e1e11bf 100644
--- a/deeprank/operate/pdb.py
+++ b/deeprank/operate/pdb.py
@@ -7,33 +7,68 @@
 _log = logging.getLogger(__name__)
 
 def get_squared_distance(position1, position2):
+    """Get squared euclidean distance, this is computationally cheaper that the euclidean distance
+
+        Args:
+            position1 (numpy vector): first position
+            position2 (numpy vector): second position
+
+        Returns (float): the squared distance
+    """
+
     return numpy.sum(numpy.square(position1 - position2))
 
 
 def get_distance(position1, position2):
+    """ Get euclidean distance between two positions in space.
+
+        Args:
+            position1 (numpy vector): first position
+            position2 (numpy vector): second position
+
+        Returns (float): the distance
+    """
+
     return numpy.sqrt(get_squared_distance(position1, position2))
 
 
 def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance):
+    """ Find interatomic contacts around a residue.
+
+        Args:
+            pdb2sql (pdb2sql object): the pdb structure that we're investigating
+            chain_id (str): the chain identifier, where the residue is located
+            residue_number (int): the residue number of interest within the chain
+            max_interatomic_distance (float): maximum distance between two atoms
+
+        Returns ([Pair(int, int)]): pairs of atom numbers that contact each other
+    """
+
+    # Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair.
     squared_max_interatomic_distance = numpy.square(max_interatomic_distance)
 
+    # List all the atoms in the selected residue, take the coordinates while we're at it:
     residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number)
     if len(residue_atoms) == 0:
         raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number))
 
+    # List all the atoms in the pdb file, take the coordinates while we're at it:
     atoms = pdb2sql.get('rowID,x,y,z')
 
+    # Iterate over all the atoms in the pdb, to find neighbours.
     contact_atom_pairs = set([])
-
     for atom_nr, x, y, z in atoms:
 
         atom_position = numpy.array([x, y, z])
 
+        # Within the atom iteration, iterate over the atoms in the residue:
         for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms:
 
             residue_position = numpy.array([residue_x, residue_y, residue_z])
 
-            if get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance:
+            # Check that the two atom numbers are not the same and check their distance:
+            if atom_nr != residue_atom_nr and \
+                    get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance:
 
                 contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr))
 

From 8d9cb1b8020e9c880a9ad369dd9eca42c3145cdf Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 14:00:42 +0000
Subject: [PATCH 5/6] add comments to the pair class and test that it is truly
 order independent

---
 deeprank/models/pair.py  | 13 +++++++++++--
 test/models/test_pair.py | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 test/models/test_pair.py

diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py
index 71cd941b..69501def 100644
--- a/deeprank/models/pair.py
+++ b/deeprank/models/pair.py
@@ -1,17 +1,26 @@
 
 
 class Pair:
-    "a hashable, comparable object for any set of two inputs where order doesn't matter"
+    """ A hashable, comparable object for any set of two inputs where order doesn't matter.
+
+        Args:
+            item1 (object): the pair's first object
+            item2 (object): the pair's second object
+    """
 
     def __init__(self, item1, item2):
         self.item1 = item1
         self.item2 = item2
 
     def __hash__(self):
-        return hash((self.item1, self.item2))
+        # The hash should be solely based on the two paired items, not on their order.
+        # So rearrange the two items and turn them into a hashable tuple.
+        return hash(tuple(sorted([self.item1, self.item2])))
 
     def __eq__(self, other):
+        # Compare the pairs as sets, so the order doesn't matter.
         return {self.item1, self.item2} == {other.item1, other.item2}
 
     def __iter__(self):
+        # Iterate over the two items in the pair.
         return iter([self.item1, self.item2])
diff --git a/test/models/test_pair.py b/test/models/test_pair.py
new file mode 100644
index 00000000..b7d02036
--- /dev/null
+++ b/test/models/test_pair.py
@@ -0,0 +1,15 @@
+from nose.tools import eq_
+
+from deeprank.models.pair import Pair
+
+
+def test_order_independency():
+    # test comparing:
+    pair1 = Pair(1, 2)
+    pair2 = Pair(2, 1)
+    eq_(pair1, pair2)
+
+    # test hashing:
+    d = {pair1: 1}
+    d[pair2] = 2
+    eq_(d[pair1], 2)

From a34cb9eb4f1abd4d5d2e37e67837071b7dd3346a Mon Sep 17 00:00:00 2001
From: Coos Baakman <coos.baakman@radboudumc.nl>
Date: Thu, 1 Apr 2021 14:05:48 +0000
Subject: [PATCH 6/6] test uniqueness of pair objects too!

---
 test/models/test_pair.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/test/models/test_pair.py b/test/models/test_pair.py
index b7d02036..fd7e1b25 100644
--- a/test/models/test_pair.py
+++ b/test/models/test_pair.py
@@ -1,15 +1,31 @@
-from nose.tools import eq_
+from nose.tools import eq_, ok_
 
 from deeprank.models.pair import Pair
 
 
 def test_order_independency():
-    # test comparing:
+    # These should be the same:
     pair1 = Pair(1, 2)
     pair2 = Pair(2, 1)
+
+    # test comparing:
     eq_(pair1, pair2)
 
     # test hashing:
     d = {pair1: 1}
     d[pair2] = 2
     eq_(d[pair1], 2)
+
+
+def test_uniqueness():
+    # These should be different:
+    pair1 = Pair(1, 2)
+    pair2 = Pair(1, 3)
+
+    # test comparing:
+    ok_(pair1 != pair2)
+
+    # test hashing:
+    d = {pair1: 1}
+    d[pair2] = 2
+    eq_(d[pair1], 1)