Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge a34cb9e into 0f6bb59
Browse files Browse the repository at this point in the history
  • Loading branch information
cbaakman committed Apr 1, 2021
2 parents 0f6bb59 + a34cb9e commit 49b7904
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 0 deletions.
26 changes: 26 additions & 0 deletions deeprank/models/pair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@


class Pair:
""" A hashable, comparable object for any set of two inputs where order doesn't matter.
Args:
item1 (object): the pair's first object
item2 (object): the pair's second object
"""

def __init__(self, item1, item2):
self.item1 = item1
self.item2 = item2

def __hash__(self):
# The hash should be solely based on the two paired items, not on their order.
# So rearrange the two items and turn them into a hashable tuple.
return hash(tuple(sorted([self.item1, self.item2])))

def __eq__(self, other):
# Compare the pairs as sets, so the order doesn't matter.
return {self.item1, self.item2} == {other.item1, other.item2}

def __iter__(self):
# Iterate over the two items in the pair.
return iter([self.item1, self.item2])
76 changes: 76 additions & 0 deletions deeprank/operate/pdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy
import logging

from deeprank.models.pair import Pair


_log = logging.getLogger(__name__)

def get_squared_distance(position1, position2):
"""Get squared euclidean distance, this is computationally cheaper that the euclidean distance
Args:
position1 (numpy vector): first position
position2 (numpy vector): second position
Returns (float): the squared distance
"""

return numpy.sum(numpy.square(position1 - position2))


def get_distance(position1, position2):
""" Get euclidean distance between two positions in space.
Args:
position1 (numpy vector): first position
position2 (numpy vector): second position
Returns (float): the distance
"""

return numpy.sqrt(get_squared_distance(position1, position2))


def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance):
""" Find interatomic contacts around a residue.
Args:
pdb2sql (pdb2sql object): the pdb structure that we're investigating
chain_id (str): the chain identifier, where the residue is located
residue_number (int): the residue number of interest within the chain
max_interatomic_distance (float): maximum distance between two atoms
Returns ([Pair(int, int)]): pairs of atom numbers that contact each other
"""

# Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair.
squared_max_interatomic_distance = numpy.square(max_interatomic_distance)

# List all the atoms in the selected residue, take the coordinates while we're at it:
residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number)
if len(residue_atoms) == 0:
raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number))

# List all the atoms in the pdb file, take the coordinates while we're at it:
atoms = pdb2sql.get('rowID,x,y,z')

# Iterate over all the atoms in the pdb, to find neighbours.
contact_atom_pairs = set([])
for atom_nr, x, y, z in atoms:

atom_position = numpy.array([x, y, z])

# Within the atom iteration, iterate over the atoms in the residue:
for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms:

residue_position = numpy.array([residue_x, residue_y, residue_z])

# Check that the two atom numbers are not the same and check their distance:
if atom_nr != residue_atom_nr and \
get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance:

contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr))


return contact_atom_pairs
31 changes: 31 additions & 0 deletions test/models/test_pair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from nose.tools import eq_, ok_

from deeprank.models.pair import Pair


def test_order_independency():
# These should be the same:
pair1 = Pair(1, 2)
pair2 = Pair(2, 1)

# test comparing:
eq_(pair1, pair2)

# test hashing:
d = {pair1: 1}
d[pair2] = 2
eq_(d[pair1], 2)


def test_uniqueness():
# These should be different:
pair1 = Pair(1, 2)
pair2 = Pair(1, 3)

# test comparing:
ok_(pair1 != pair2)

# test hashing:
d = {pair1: 1}
d[pair2] = 2
eq_(d[pair1], 1)
45 changes: 45 additions & 0 deletions test/operate/test_pdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy
import logging
import pkg_resources
import os

from pdb2sql import pdb2sql
from nose.tools import ok_

from deeprank.operate.pdb import get_residue_contact_atom_pairs


_log = logging.getLogger(__name__)

def test_residue_contact_atoms():

pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
"../1AK4/native/1AK4.pdb")

try:
pdb = pdb2sql(pdb_path)

contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5)

# List all the atoms in the pairs that we found:
contact_atoms = set([])
for atom1, atom2 in contact_atom_pairs:
contact_atoms.add(atom1)
contact_atoms.add(atom2)

# Ask pdb2sql for the residue identifiers & atom names:
contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))]
finally:
pdb._close()

# Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs.
# We do so by selecting one close residue and one distant residue.

neighbour = ('C', 144, 'CA') # this residue sits right next to the selected residue
distant = ('B', 134, 'OE2') # this residue is very far away

# Check that the close residue is present in the list and that the distant residue is absent.

ok_(neighbour in contact_atom_names)

ok_(distant not in contact_atom_names)

0 comments on commit 49b7904

Please sign in to comment.