This repository has been archived by the owner on Nov 28, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
|
||
|
||
class Atom: | ||
def __init__(self, id_, position, chain_id, name, residue=None): | ||
self.id = id_ | ||
self.name = name | ||
self.chain_id = chain_id | ||
self.position = position | ||
self.residue = residue | ||
|
||
def __hash__(self): | ||
return hash(self.id) | ||
|
||
def __eq__(self, other): | ||
return self.id == other.id | ||
|
||
def __lt__(self, other): | ||
return self.id < other.id | ||
|
||
def __repr__(self): | ||
return "Atom {} ({}) from {} {} at {}".format(self.id, self.chain_id, self.residue, self.name, self.position) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
|
||
class Pair: | ||
""" A hashable, comparable object for any set of two inputs where order doesn't matter. | ||
Args: | ||
item1 (object): the pair's first object | ||
item2 (object): the pair's second object | ||
""" | ||
|
||
def __init__(self, item1, item2): | ||
self.item1 = item1 | ||
self.item2 = item2 | ||
|
||
def __hash__(self): | ||
# The hash should be solely based on the two paired items, not on their order. | ||
# So rearrange the two items and turn them into a hashable tuple. | ||
return hash(tuple(sorted([self.item1, self.item2]))) | ||
|
||
def __eq__(self, other): | ||
# Compare the pairs as sets, so the order doesn't matter. | ||
return {self.item1, self.item2} == {other.item1, other.item2} | ||
|
||
def __iter__(self): | ||
# Iterate over the two items in the pair. | ||
return iter([self.item1, self.item2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
|
||
class Residue: | ||
def __init__(self, number, name): | ||
self.number = number | ||
self.name = name | ||
self.atoms = [] | ||
|
||
@property | ||
def chain_id(self): | ||
chain_ids = set([atom.chain_id for atom in self.atoms]) | ||
if len(chain_ids) > 1: | ||
raise ValueError("residue {} {} contains atoms of different chains: {}".format(self.name, self.number, self.atoms)) | ||
|
||
return list(chain_ids)[0] | ||
|
||
def __hash__(self): | ||
return hash((self.chain_id, self.number)) | ||
|
||
def __eq__(self, other): | ||
return self.chain_id == other.chain_id and self.number == other.number | ||
|
||
def __repr__(self): | ||
return "Residue {} {}".format(self.name, self.number) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import numpy | ||
import logging | ||
|
||
from deeprank.models.pair import Pair | ||
from deeprank.models.atom import Atom | ||
from deeprank.models.residue import Residue | ||
|
||
|
||
_log = logging.getLogger(__name__) | ||
|
||
|
||
def get_distance(position1, position2): | ||
""" Get euclidean distance between two positions in space. | ||
Args: | ||
position1 (numpy vector): first position | ||
position2 (numpy vector): second position | ||
Returns (float): the distance | ||
""" | ||
|
||
return numpy.sqrt(numpy.sum(numpy.square(position1 - position2))) | ||
|
||
|
||
def get_atoms(pdb2sql): | ||
""" Builds a list of atom objects, according to the contents of the pdb file. | ||
Args: | ||
pdb2sql (pdb2sql object): the pdb structure that we're investigating | ||
Returns ([Atom]): all the atoms in the pdb file. | ||
""" | ||
|
||
# This is a working dictionary of residues, identified by their chains and numbers. | ||
residues = {} | ||
|
||
# This is the list of atom objects, that will be returned. | ||
atoms = [] | ||
|
||
# Iterate over the atom output from pdb2sql | ||
for x, y, z, atom_number, atom_name, chain_id, residue_number, residue_name in \ | ||
pdb2sql.get("x,y,z,rowID,name,chainID,resSeq,resName"): | ||
|
||
# Make sure that the residue is in the working directory: | ||
residue_id = (chain_id, residue_number) | ||
if residue_id not in residues: | ||
residues[residue_id] = Residue(residue_number, residue_name) | ||
|
||
# Turn the x,y,z into a vector: | ||
atom_position = numpy.array([x, y, z]) | ||
|
||
# Create the atom object and link it to the residue: | ||
atom = Atom(atom_number, atom_position, chain_id, atom_name, residues[residue_id]) | ||
residues[residue_id].atoms.append(atom) | ||
atoms.append(atom) | ||
|
||
return atoms | ||
|
||
|
||
def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance): | ||
""" Find interatomic contacts around a residue. | ||
Args: | ||
pdb2sql (pdb2sql object): the pdb structure that we're investigating | ||
chain_id (str): the chain identifier, where the residue is located | ||
residue_number (int): the residue number of interest within the chain | ||
max_interatomic_distance (float): maximum distance between two atoms | ||
Returns ([Pair(int, int)]): pairs of atom numbers that contact each other | ||
""" | ||
|
||
# get all the atoms in the pdb file: | ||
atoms = get_atoms(pdb2sql) | ||
|
||
# List all the atoms in the selected residue, take the coordinates while we're at it: | ||
residue_atoms = [atom for atom in atoms if atom.chain_id == chain_id and | ||
atom.residue.number == residue_number] | ||
if len(residue_atoms) == 0: | ||
raise ValueError("No atoms found in chain {} with residue number {}".format(chain_id, residue_number)) | ||
|
||
# Iterate over all the atoms in the pdb, to find neighbours. | ||
contact_atom_pairs = set([]) | ||
for atom in atoms: | ||
|
||
# Check that the atom is not one of the residue's own atoms: | ||
if atom.chain_id == chain_id and atom.residue.number == residue_number: | ||
continue | ||
|
||
# Within the atom iteration, iterate over the atoms in the residue: | ||
for residue_atom in residue_atoms: | ||
|
||
# Check that the atom is close enough: | ||
if get_distance(atom.position, residue_atom.position) < max_interatomic_distance: | ||
|
||
contact_atom_pairs.add(Pair(residue_atom, atom)) | ||
|
||
|
||
return contact_atom_pairs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from nose.tools import eq_, ok_ | ||
|
||
from deeprank.models.pair import Pair | ||
|
||
|
||
def test_order_independency(): | ||
# These should be the same: | ||
pair1 = Pair(1, 2) | ||
pair2 = Pair(2, 1) | ||
|
||
# test comparing: | ||
eq_(pair1, pair2) | ||
|
||
# test hashing: | ||
d = {pair1: 1} | ||
d[pair2] = 2 | ||
eq_(d[pair1], 2) | ||
|
||
|
||
def test_uniqueness(): | ||
# These should be different: | ||
pair1 = Pair(1, 2) | ||
pair2 = Pair(1, 3) | ||
|
||
# test comparing: | ||
ok_(pair1 != pair2) | ||
|
||
# test hashing: | ||
d = {pair1: 1} | ||
d[pair2] = 2 | ||
eq_(d[pair1], 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import numpy | ||
import logging | ||
import pkg_resources | ||
import os | ||
|
||
from pdb2sql import pdb2sql | ||
from nose.tools import ok_ | ||
|
||
from deeprank.operate.pdb import get_residue_contact_atom_pairs, get_atoms | ||
|
||
|
||
_log = logging.getLogger(__name__) | ||
|
||
|
||
def test_get_atoms(): | ||
pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''), | ||
"../1AK4/native/1AK4.pdb") | ||
|
||
try: | ||
pdb = pdb2sql(pdb_path) | ||
|
||
atoms = get_atoms(pdb) | ||
|
||
ok_(len(atoms) > 0) | ||
finally: | ||
pdb._close() | ||
|
||
|
||
def _find_atom(atoms, chain_id, residue_number, name): | ||
matching_atoms = [atom for atom in atoms if atom.chain_id == chain_id and | ||
atom.name == name and atom.residue.number == residue_number] | ||
|
||
assert len(matching_atoms) == 1, "Expected exacly one matching atom, got {}".format(len(matching_atoms)) | ||
|
||
return matching_atoms[0] | ||
|
||
|
||
def _find_residue(atoms, chain_id, residue_number): | ||
matching_atoms = [atom for atom in atoms if atom.chain_id == chain_id and | ||
atom.residue.number == residue_number] | ||
|
||
assert len(matching_atoms) > 0, "Expected at least one matching atom, got zero" | ||
|
||
return matching_atoms[0].residue | ||
|
||
|
||
def test_residue_contact_atoms(): | ||
|
||
pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''), | ||
"../1AK4/native/1AK4.pdb") | ||
chain_id = 'D' | ||
residue_number = 145 | ||
|
||
try: | ||
pdb = pdb2sql(pdb_path) | ||
|
||
atoms = get_atoms(pdb) | ||
query_residue = _find_residue(atoms, chain_id, residue_number) | ||
|
||
contact_atom_pairs = get_residue_contact_atom_pairs(pdb, chain_id, residue_number, 8.5) | ||
|
||
# List all the atoms in the pairs that we found: | ||
contact_atoms = set([]) | ||
for atom1, atom2 in contact_atom_pairs: | ||
contact_atoms.add(atom1) | ||
contact_atoms.add(atom2) | ||
|
||
# Be sure that this atom pair does not pair two atoms of the same residue: | ||
ok_(atom1.residue != atom2.residue) | ||
|
||
# Be sure that one of the atoms in the pair is from the query residue: | ||
ok_(atom1.residue == query_residue or atom2.residue == query_residue) | ||
finally: | ||
pdb._close() | ||
|
||
# Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs. | ||
# We do so by selecting one close residue and one distant residue. | ||
|
||
neighbour = _find_atom(atoms, 'D', 144, 'CA') # this residue sits right next to the selected residue | ||
distant = _find_atom(atoms, 'C', 134, 'OE2') # this residue is very far away | ||
|
||
# Check that the close residue is present in the list and that the distant residue is absent. | ||
# Also, the function should not pair a residue with itself. | ||
|
||
ok_(neighbour in contact_atoms) | ||
ok_(distant not in contact_atoms) |