Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
store atom data in objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Coos Baakman committed Apr 6, 2021
1 parent a34cb9e commit 075c39b
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 23 deletions.
22 changes: 22 additions & 0 deletions deeprank/models/atom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@


class Atom:
def __init__(self, id_, position, chain_id, name, residue=None):
self.id = id_
self.name = name
self.chain_id = chain_id
self.position = position
self.residue = residue

def __hash__(self):
return hash(self.id)

def __eq__(self, other):
return self.id == other.id

def __lt__(self, other):
return self.id < other.id

def __repr__(self):
return "Atom {} ({} {} {}) at {}".format(self.id, self.chain_id, self.residue, self.name, self.position)

24 changes: 24 additions & 0 deletions deeprank/models/residue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@


class Residue:
def __init__(self, number, name, atoms=[]):
self.number = number
self.name = name
self.atoms = atoms

@property
def chain_id(self):
chain_ids = set([atom.chain_id for atom in self.atoms])
if len(chain_ids) > 1:
raise ValueError("residue {} {} contains atoms of different chains: {}".format(self.name, self.number, self.atoms))

return list(chain_ids)[0]

def __hash__(self):
return hash((self.chain_id, self.number))

def __eq__(self, other):
return self.chain_id == other.chain_id and self.number == other.number

def __repr__(self):
return "Residue {} {}".format(self.name, self.number)
62 changes: 48 additions & 14 deletions deeprank/operate/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import logging

from deeprank.models.pair import Pair
from deeprank.models.atom import Atom
from deeprank.models.residue import Residue


_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -32,6 +34,41 @@ def get_distance(position1, position2):
return numpy.sqrt(get_squared_distance(position1, position2))


def get_atoms(pdb2sql):
""" Builds a list of atom objects, according to the contents of the pdb file.
Args:
pdb2sql (pdb2sql object): the pdb structure that we're investigating
Returns ([Atom]): all the atoms in the pdb file.
"""

# This is a working dictionary of residues, identified by their chains and numbers.
residues = {}

# This is the list of atom objects, that will be returned.
atoms = []

# Iterate over the atom output from pdb2sql
for x, y, z, atom_number, atom_name, chain_id, residue_number, residue_name in \
pdb2sql.get("x,y,z,rowID,name,chainID,resSeq,resName"):

# Make sure that the residue is in the working directory:
residue_id = (chain_id, residue_number)
if residue_id not in residues:
residues[residue_id] = Residue(residue_number, residue_name)

# Turn the x,y,z into a vector:
atom_position = numpy.array([x, y, z])

# Create the atom object and link it to the residue:
atom = Atom(atom_number, atom_position, chain_id, atom_name, residues[residue_id])
residues[residue_id].atoms.append(atom)
atoms.append(atom)

return atoms


def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_interatomic_distance):
""" Find interatomic contacts around a residue.
Expand All @@ -47,30 +84,27 @@ def get_residue_contact_atom_pairs(pdb2sql, chain_id, residue_number, max_intera
# Square the max distance, so that we can compare it to the squared euclidean distance between each atom pair.
squared_max_interatomic_distance = numpy.square(max_interatomic_distance)

# get all the atoms in the pdb file:
atoms = get_atoms(pdb2sql)

# List all the atoms in the selected residue, take the coordinates while we're at it:
residue_atoms = pdb2sql.get('rowID,x,y,z', chainID=chain_id, resSeq=residue_number)
residue_atoms = [atom for atom in atoms if atom.chain_id == chain_id and
atom.residue.number == residue_number]
if len(residue_atoms) == 0:
raise ValueError("No residue found in chain {} with number {}".format(chain_id, residue_number))

# List all the atoms in the pdb file, take the coordinates while we're at it:
atoms = pdb2sql.get('rowID,x,y,z')
raise ValueError("No atoms found in chain {} with residue number {}".format(chain_id, residue_number))

# Iterate over all the atoms in the pdb, to find neighbours.
contact_atom_pairs = set([])
for atom_nr, x, y, z in atoms:

atom_position = numpy.array([x, y, z])
for atom in atoms:

# Within the atom iteration, iterate over the atoms in the residue:
for residue_atom_nr, residue_x, residue_y, residue_z in residue_atoms:

residue_position = numpy.array([residue_x, residue_y, residue_z])
for residue_atom in residue_atoms:

# Check that the two atom numbers are not the same and check their distance:
if atom_nr != residue_atom_nr and \
get_squared_distance(atom_position, residue_position) < squared_max_interatomic_distance:
if atom != residue_atom and \
get_squared_distance(atom.position, residue_atom.position) < squared_max_interatomic_distance:

contact_atom_pairs.add(Pair(residue_atom_nr, atom_nr))
contact_atom_pairs.add(Pair(residue_atom, atom))


return contact_atom_pairs
41 changes: 32 additions & 9 deletions test/operate/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,35 @@
from pdb2sql import pdb2sql
from nose.tools import ok_

from deeprank.operate.pdb import get_residue_contact_atom_pairs
from deeprank.operate.pdb import get_residue_contact_atom_pairs, get_atoms


_log = logging.getLogger(__name__)


def test_get_atoms():
pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
"../1AK4/native/1AK4.pdb")

try:
pdb = pdb2sql(pdb_path)

atoms = get_atoms(pdb)

ok_(len(atoms) > 0)
finally:
pdb._close()


def _find_atom(atoms, chain_id, residue_number, name):
matching_atoms = [atom for atom in atoms if atom.chain_id == chain_id and
atom.name == name and atom.residue.number == residue_number]

assert len(matching_atoms) == 1, "Expected exacly one matching atom, got {}".format(len(matching_atoms))

return matching_atoms[0]


def test_residue_contact_atoms():

pdb_path = os.path.join(pkg_resources.resource_filename(__name__, ''),
Expand All @@ -19,27 +43,26 @@ def test_residue_contact_atoms():
try:
pdb = pdb2sql(pdb_path)

contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'C', 145, 8.5)
atoms = get_atoms(pdb)

contact_atom_pairs = get_residue_contact_atom_pairs(pdb, 'D', 145, 8.5)

# List all the atoms in the pairs that we found:
contact_atoms = set([])
for atom1, atom2 in contact_atom_pairs:
contact_atoms.add(atom1)
contact_atoms.add(atom2)

# Ask pdb2sql for the residue identifiers & atom names:
contact_atom_names = [tuple(x) for x in pdb.get("chainID,resSeq,name", rowID=list(contact_atoms))]
finally:
pdb._close()

# Now, we need to verify that the function "get_residue_contact_atom_pairs" returned the right pairs.
# We do so by selecting one close residue and one distant residue.

neighbour = ('C', 144, 'CA') # this residue sits right next to the selected residue
distant = ('B', 134, 'OE2') # this residue is very far away
neighbour = _find_atom(atoms, 'D', 144, 'CA') # this residue sits right next to the selected residue
distant = _find_atom(atoms, 'C', 134, 'OE2') # this residue is very far away

# Check that the close residue is present in the list and that the distant residue is absent.

ok_(neighbour in contact_atom_names)
ok_(neighbour in contact_atoms)

ok_(distant not in contact_atom_names)
ok_(distant not in contact_atoms)

0 comments on commit 075c39b

Please sign in to comment.