From 848a39440294683af42f0ba0ac8de6ab13881580 Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 12:11:50 +0000 Subject: [PATCH 1/6] add an operating function to find the surrounding atoms around a residue --- deeprank/models/pair.py | 17 +++++++++++++++++ deeprank/operate/pdb.py | 41 ++++++++++++++++++++++++++++++++++++++++ test/operate/test_pdb.py | 34 +++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 deeprank/models/pair.py create mode 100644 deeprank/operate/pdb.py create mode 100644 test/operate/test_pdb.py diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py new file mode 100644 index 00000000..71cd941b --- /dev/null +++ b/deeprank/models/pair.py @@ -0,0 +1,17 @@ + + +class Pair: + "a hashable, comparable object for any set of two inputs where order doesn't matter" + + def __init__(self, item1, item2): + self.item1 = item1 + self.item2 = item2 + + def __hash__(self): + return hash((self.item1, self.item2)) + + def __eq__(self, other): + return {self.item1, self.item2} == {other.item1, other.item2} + + def __iter__(self): + return iter([self.item1, self.item2]) diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py new file mode 100644 index 00000000..9c9944a2 --- /dev/null +++ b/deeprank/operate/pdb.py @@ -0,0 +1,41 @@ +import numpy +import logging + +from deeprank.models.pair import Pair + + +_log = logging.getLogger(__name__) + +def get_squared_distance(position1, position2): + return numpy.sum(numpy.square(position1 - position2)) + + +def get_distance(position1, position2): + return numpy.sqrt(get_squared_distance(position1, position2)) + + +def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance): + squared_max_interatomic_distance = numpy.square(max_interatomic_distance) + + residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number) + if len(residue_atoms) == 0: + raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number)) + + atoms = pdb2sql.get('rowID,x,y,z') + + contact_atom_pairs = set([]) + + for atom_nr, x, y, z in atoms: + + atom_position = numpy.array([x, y, z]) + + for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms: + + residue_position = numpy.array([residue_x, residue_y, residue_z]) + + if get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance: + + contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr)) + + + return contact_atom_pairs diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py new file mode 100644 index 00000000..e66324cd --- /dev/null +++ b/test/operate/test_pdb.py @@ -0,0 +1,34 @@ +import numpy +import logging + +from pdb2sql import pdb2sql +from nose.tools import ok_ + +from deeprank.operate.pdb import get_residue_contact_atom_pairs + + +_log = logging.getLogger(__name__) + +def test_residue_contact_atoms(): + pdb_path = "test/1AK4/native/1AK4.pdb" + + try: + pdb = pdb2sql(pdb_path) + + contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5) + + contact_atoms = set([]) + for atom1, atom2 in contact_atom_pairs: + contact_atoms.add(atom1) + contact_atoms.add(atom2) + + contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))] + finally: + pdb._close() + + neighbour = ('C', 144, 'CA') + distant = ('B', 134, 'OE2') + + ok_(neighbour in contact_atom_names) + + ok_(distant not in contact_atom_names) From 63f53a6e1ecf87d82986f3f83d66f3ac0e63be42 Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 12:28:00 +0000 Subject: [PATCH 2/6] pathname fix to make the test work from anywhere --- test/operate/test_pdb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py index e66324cd..90a3fc64 100644 --- a/test/operate/test_pdb.py +++ b/test/operate/test_pdb.py @@ -1,5 +1,7 @@ import numpy import logging +import pkg_resources +import os from pdb2sql import pdb2sql from nose.tools import ok_ @@ -10,7 +12,9 @@ _log = logging.getLogger(__name__) def test_residue_contact_atoms(): - pdb_path = "test/1AK4/native/1AK4.pdb" + + pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''), + "../1AK4/native/1AK4.pdb") try: pdb = pdb2sql(pdb_path) From 31907e31a690918c9870728637ebad886882d6a3 Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 13:40:09 +0000 Subject: [PATCH 3/6] explain the test with comments --- test/operate/test_pdb.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/operate/test_pdb.py b/test/operate/test_pdb.py index 90a3fc64..b8c1f755 100644 --- a/test/operate/test_pdb.py +++ b/test/operate/test_pdb.py @@ -21,17 +21,24 @@ def test_residue_contact_atoms(): contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5) + # List all the atoms in the pairs that we found: contact_atoms = set([]) for atom1, atom2 in contact_atom_pairs: contact_atoms.add(atom1) contact_atoms.add(atom2) + # Ask pdb2sql for the residue identifiers & atom names: contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))] finally: pdb._close() - neighbour = ('C', 144, 'CA') - distant = ('B', 134, 'OE2') + # Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs. + # We do so by selecting one close residue and one distant residue. + + neighbour = ('C', 144, 'CA') # this residue sits right next to the selected residue + distant = ('B', 134, 'OE2') # this residue is very far away + + # Check that the close residue is present in the list and that the distant residue is absent. ok_(neighbour in contact_atom_names) From f330c0599edd0648fcdd09bff8ab2923a272a897 Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 13:52:31 +0000 Subject: [PATCH 4/6] added some comments and do an extra check to prevent pairing an atom with itself --- deeprank/operate/pdb.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/deeprank/operate/pdb.py b/deeprank/operate/pdb.py index 9c9944a2..6e1e11bf 100644 --- a/deeprank/operate/pdb.py +++ b/deeprank/operate/pdb.py @@ -7,33 +7,68 @@ _log = logging.getLogger(__name__) def get_squared_distance(position1, position2): + """Get squared euclidean distance, this is computationally cheaper that the euclidean distance + + Args: + position1 (numpy vector): first position + position2 (numpy vector): second position + + Returns (float): the squared distance + """ + return numpy.sum(numpy.square(position1 - position2)) def get_distance(position1, position2): + """ Get euclidean distance between two positions in space. + + Args: + position1 (numpy vector): first position + position2 (numpy vector): second position + + Returns (float): the distance + """ + return numpy.sqrt(get_squared_distance(position1, position2)) def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance): + """ Find interatomic contacts around a residue. + + Args: + pdb2sql (pdb2sql object): the pdb structure that we're investigating + chain_id (str): the chain identifier, where the residue is located + residue_number (int): the residue number of interest within the chain + max_interatomic_distance (float): maximum distance between two atoms + + Returns ([Pair(int, int)]): pairs of atom numbers that contact each other + """ + + # Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair. squared_max_interatomic_distance = numpy.square(max_interatomic_distance) + # List all the atoms in the selected residue, take the coordinates while we're at it: residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number) if len(residue_atoms) == 0: raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number)) + # List all the atoms in the pdb file, take the coordinates while we're at it: atoms = pdb2sql.get('rowID,x,y,z') + # Iterate over all the atoms in the pdb, to find neighbours. contact_atom_pairs = set([]) - for atom_nr, x, y, z in atoms: atom_position = numpy.array([x, y, z]) + # Within the atom iteration, iterate over the atoms in the residue: for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms: residue_position = numpy.array([residue_x, residue_y, residue_z]) - if get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance: + # Check that the two atom numbers are not the same and check their distance: + if atom_nr != residue_atom_nr and \ + get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance: contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr)) From 8d9cb1b8020e9c880a9ad369dd9eca42c3145cdf Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 14:00:42 +0000 Subject: [PATCH 5/6] add comments to the pair class and test that it is truly order independent --- deeprank/models/pair.py | 13 +++++++++++-- test/models/test_pair.py | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 test/models/test_pair.py diff --git a/deeprank/models/pair.py b/deeprank/models/pair.py index 71cd941b..69501def 100644 --- a/deeprank/models/pair.py +++ b/deeprank/models/pair.py @@ -1,17 +1,26 @@ class Pair: - "a hashable, comparable object for any set of two inputs where order doesn't matter" + """ A hashable, comparable object for any set of two inputs where order doesn't matter. + + Args: + item1 (object): the pair's first object + item2 (object): the pair's second object + """ def __init__(self, item1, item2): self.item1 = item1 self.item2 = item2 def __hash__(self): - return hash((self.item1, self.item2)) + # The hash should be solely based on the two paired items, not on their order. + # So rearrange the two items and turn them into a hashable tuple. + return hash(tuple(sorted([self.item1, self.item2]))) def __eq__(self, other): + # Compare the pairs as sets, so the order doesn't matter. return {self.item1, self.item2} == {other.item1, other.item2} def __iter__(self): + # Iterate over the two items in the pair. return iter([self.item1, self.item2]) diff --git a/test/models/test_pair.py b/test/models/test_pair.py new file mode 100644 index 00000000..b7d02036 --- /dev/null +++ b/test/models/test_pair.py @@ -0,0 +1,15 @@ +from nose.tools import eq_ + +from deeprank.models.pair import Pair + + +def test_order_independency(): + # test comparing: + pair1 = Pair(1, 2) + pair2 = Pair(2, 1) + eq_(pair1, pair2) + + # test hashing: + d = {pair1: 1} + d[pair2] = 2 + eq_(d[pair1], 2) From a34cb9eb4f1abd4d5d2e37e67837071b7dd3346a Mon Sep 17 00:00:00 2001 From: Coos Baakman Date: Thu, 1 Apr 2021 14:05:48 +0000 Subject: [PATCH 6/6] test uniqueness of pair objects too! --- test/models/test_pair.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/models/test_pair.py b/test/models/test_pair.py index b7d02036..fd7e1b25 100644 --- a/test/models/test_pair.py +++ b/test/models/test_pair.py @@ -1,15 +1,31 @@ -from nose.tools import eq_ +from nose.tools import eq_, ok_ from deeprank.models.pair import Pair def test_order_independency(): - # test comparing: + # These should be the same: pair1 = Pair(1, 2) pair2 = Pair(2, 1) + + # test comparing: eq_(pair1, pair2) # test hashing: d = {pair1: 1} d[pair2] = 2 eq_(d[pair1], 2) + + +def test_uniqueness(): + # These should be different: + pair1 = Pair(1, 2) + pair2 = Pair(1, 3) + + # test comparing: + ok_(pair1 != pair2) + + # test hashing: + d = {pair1: 1} + d[pair2] = 2 + eq_(d[pair1], 1)