This repository has been archived by the owner on Nov 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
162 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
|
||
class Pair: | ||
""" A hashable, comparable object for any set of two inputs where order doesn't matter. | ||
Args: | ||
item1 (object): the pair's first object | ||
item2 (object): the pair's second object | ||
""" | ||
|
||
def __init__(self, item1, item2): | ||
self.item1 = item1 | ||
self.item2 = item2 | ||
|
||
def __hash__(self): | ||
# The hash should be solely based on the two paired items, not on their order. | ||
# So rearrange the two items and turn them into a hashable tuple. | ||
return hash(tuple(sorted([self.item1, self.item2]))) | ||
|
||
def __eq__(self, other): | ||
# Compare the pairs as sets, so the order doesn't matter. | ||
return {self.item1, self.item2} == {other.item1, other.item2} | ||
|
||
def __iter__(self): | ||
# Iterate over the two items in the pair. | ||
return iter([self.item1, self.item2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import numpy | ||
import logging | ||
|
||
from deeprank.models.pair import Pair | ||
|
||
|
||
_log = logging.getLogger(__name__) | ||
|
||
def get_squared_distance(position1, position2): | ||
"""Get squared euclidean distance, this is computationally cheaper that the euclidean distance | ||
Args: | ||
position1 (numpy vector): first position | ||
position2 (numpy vector): second position | ||
Returns (float): the squared distance | ||
""" | ||
|
||
return numpy.sum(numpy.square(position1 - position2)) | ||
|
||
|
||
def get_distance(position1, position2): | ||
""" Get euclidean distance between two positions in space. | ||
Args: | ||
position1 (numpy vector): first position | ||
position2 (numpy vector): second position | ||
Returns (float): the distance | ||
""" | ||
|
||
return numpy.sqrt(get_squared_distance(position1, position2)) | ||
|
||
|
||
def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance): | ||
""" Find interatomic contacts around a residue. | ||
Args: | ||
pdb2sql (pdb2sql object): the pdb structure that we're investigating | ||
chain_id (str): the chain identifier, where the residue is located | ||
residue_number (int): the residue number of interest within the chain | ||
max_interatomic_distance (float): maximum distance between two atoms | ||
Returns ([Pair(int, int)]): pairs of atom numbers that contact each other | ||
""" | ||
|
||
# Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair. | ||
squared_max_interatomic_distance = numpy.square(max_interatomic_distance) | ||
|
||
# List all the atoms in the selected residue, take the coordinates while we're at it: | ||
residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number) | ||
if len(residue_atoms) == 0: | ||
raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number)) | ||
|
||
# List all the atoms in the pdb file, take the coordinates while we're at it: | ||
atoms = pdb2sql.get('rowID,x,y,z') | ||
|
||
# Iterate over all the atoms in the pdb, to find neighbours. | ||
contact_atom_pairs = set([]) | ||
for atom_nr, x, y, z in atoms: | ||
|
||
atom_position = numpy.array([x, y, z]) | ||
|
||
# Within the atom iteration, iterate over the atoms in the residue: | ||
for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms: | ||
|
||
residue_position = numpy.array([residue_x, residue_y, residue_z]) | ||
|
||
# Check that the two atom numbers are not the same and check their distance: | ||
if atom_nr != residue_atom_nr and \ | ||
get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance: | ||
|
||
contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr)) | ||
|
||
|
||
return contact_atom_pairs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from nose.tools import eq_ | ||
|
||
from deeprank.models.pair import Pair | ||
|
||
|
||
def test_order_independency(): | ||
# test comparing: | ||
pair1 = Pair(1, 2) | ||
pair2 = Pair(2, 1) | ||
eq_(pair1, pair2) | ||
|
||
# test hashing: | ||
d = {pair1: 1} | ||
d[pair2] = 2 | ||
eq_(d[pair1], 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import numpy | ||
import logging | ||
import pkg_resources | ||
import os | ||
|
||
from pdb2sql import pdb2sql | ||
from nose.tools import ok_ | ||
|
||
from deeprank.operate.pdb import get_residue_contact_atom_pairs | ||
|
||
|
||
_log = logging.getLogger(__name__) | ||
|
||
def test_residue_contact_atoms(): | ||
|
||
pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''), | ||
"../1AK4/native/1AK4.pdb") | ||
|
||
try: | ||
pdb = pdb2sql(pdb_path) | ||
|
||
contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5) | ||
|
||
# List all the atoms in the pairs that we found: | ||
contact_atoms = set([]) | ||
for atom1, atom2 in contact_atom_pairs: | ||
contact_atoms.add(atom1) | ||
contact_atoms.add(atom2) | ||
|
||
# Ask pdb2sql for the residue identifiers & atom names: | ||
contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))] | ||
finally: | ||
pdb._close() | ||
|
||
# Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs. | ||
# We do so by selecting one close residue and one distant residue. | ||
|
||
neighbour = ('C', 144, 'CA') # this residue sits right next to the selected residue | ||
distant = ('B', 134, 'OE2') # this residue is very far away | ||
|
||
# Check that the close residue is present in the list and that the distant residue is absent. | ||
|
||
ok_(neighbour in contact_atom_names) | ||
|
||
ok_(distant not in contact_atom_names) |