Skip to content

Commit

Permalink
Added tests against biopython
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentRDC committed Nov 8, 2018
1 parent 794a6e1 commit 695ea64
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 8 deletions.
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ crystals
:alt: Supported Python versions
:target: https://pypi.python.org/pypi/crystals

``crystals`` is a library of data structure and algorithms to manipulate abstract crystals. ``crystals`` helps with reading crystallographic
files (like .cif and .pdb), provides access to atomic positions, and allows for space-group determination. Although ``crystals`` can be used on its own,
``crystals`` is a library of data structure and algorithms to manipulate abstract crystals in a Pythonic way. ``crystals`` helps with reading crystallographic
files (like .cif and .pdb), provides access to atomic positions, scattering utilities, and allows for symmetry determination. Although ``crystals`` can be used on its own,
it was made to be integrated into larger projects (like `scikit-ued <https://github.com/LaurentRDC/scikit-ued>`_).

Take a look at the `documentation <https://crystals.readthedocs.io/>`_ for more information.
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ environment:
TEST_CMD: "python -m unittest discover --verbose"
NUMPY_VERSION: "1.14"
CONDA_CHANNELS: "conda-forge"
CONDA_DEPENDENCIES: "numpy spglib pycifrw ase"
CONDA_DEPENDENCIES: "numpy spglib pycifrw ase biopython"
PIP_DEPENDENCIES: ""

matrix:
Expand Down
15 changes: 10 additions & 5 deletions crystals/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,23 @@ def lattice_vectors(self):

def atoms(self):
"""
Returns a list of atoms associated with a PDB structure. These atoms form the asymmetric unit cell.
Returns a list of atoms associated with a PDB structure in fractional coordinates.
These atoms form the asymmetric unit cell.
Yields
------
atom: Atom
"""
self._handle.seek(0)
# Lattice vectors have to be determined first because
# the file pointer is moved
lattice_vectors = self.lattice_vectors()

for line in filter(lambda l: l.startswith( ('ATOM', 'HEMATM') ), self._handle):
self._handle.seek(0)
for line in filter(lambda l: l.startswith( ('ATOM', 'HETATM') ), self._handle):
x, y, z = float(line[30:38]), float(line[38:46]), float(line[46:54])
element = str(line[76:78]).replace(' ','')
yield Atom(element = element, coords = frac_coords(np.array([x,y,z]), self.lattice_vectors()))
element = str(line[76:78]).replace(' ','').title()
fractional_coordinates = frac_coords(np.array([x,y,z]), lattice_vectors)
yield Atom(element = element, coords = fractional_coordinates)

def symmetry_operators(self):
"""
Expand Down
81 changes: 81 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import socket
import tempfile
import unittest
from collections import Counter, namedtuple
from contextlib import suppress
from pathlib import Path
from warnings import catch_warnings
from warnings import filterwarnings

import numpy as np
Expand All @@ -13,10 +15,21 @@
from crystals import CIFParser
from crystals import Crystal
from crystals import PDBParser
from crystals import frac_coords
from crystals.affine import transform
from crystals.parsers import STRUCTURE_CACHE
from crystals.spg_data import Hall2Number

try:
import Bio.PDB as biopdb
except ImportError:
WITH_BIOPYTHON = False
else:
WITH_BIOPYTHON = True

# Used to compare crystals.Atom instances and Bio.PDB.Atom instances
GenericAtom = namedtuple('GenericAtom', ['element', 'coords'])

filterwarnings('ignore', category = UserWarning)

def connection_available():
Expand Down Expand Up @@ -57,6 +70,74 @@ def test_default_download_dir(self):
self.assertTrue(filename.exists())
self.assertEqual(filename.parent, STRUCTURE_CACHE)

@unittest.skipUnless(WITH_BIOPYTHON, 'Biopython is not installed/importable.')
@unittest.skipUnless(connection_available(), "Internet connection is required.")
class TestPDBParserAgainstBioPython(unittest.TestCase):

# Each test will be performed on the following structures
test_ids = ('1fbb', '1fat', '1gzx')

def setUp(self):
self.pdb_list = biopdb.PDBList()
self.biopdb_parser = biopdb.PDBParser()

def test_chemical_composition(self):
""" Test crystals.PDBParser returns the chemical compisition as BIO.PDB.PDBParser implementation,
i.e. the same elements in the right proportions. """
with catch_warnings():
filterwarnings('ignore', category=biopdb.PDBExceptions.PDBConstructionWarning)
with tempfile.TemporaryDirectory() as temp_dir:
for protein_id in self.test_ids:
with self.subTest(f'Protein ID: {protein_id}'):
with PDBParser(protein_id, download_dir=temp_dir) as parser:
fname = self.pdb_list.retrieve_pdb_file(protein_id, pdir = temp_dir, file_format='pdb')

# Note: Bio.PDB atoms store element as uppercase strings. Thus, they must be changed to titlecase
crystals_chemical_composition = Counter([atm.element for atm in parser.atoms()])
biopdb_chemical_composition = Counter(
[atm.element.title() for atm in self.biopdb_parser.get_structure(protein_id, fname).get_atoms()]
)

self.assertDictEqual(biopdb_chemical_composition,
crystals_chemical_composition)

@unittest.skip('')
def test_atomic_positions(self):
""" Test crystals.PDBParser returns atoms in the same position as the BIO.PDB.PDBParser implementation """
with catch_warnings():
filterwarnings('ignore', category=biopdb.PDBExceptions.PDBConstructionWarning)

with tempfile.TemporaryDirectory() as temp_dir:
for protein_id in ('1fbb', '1fat', '1gzx'):
with self.subTest(f'Protein ID: {protein_id}'):
with PDBParser(protein_id, download_dir=temp_dir) as parser:
fname = self.pdb_list.retrieve_pdb_file(protein_id, pdir = temp_dir, file_format='pdb')

biopdb_atoms = self.biopdb_parser.get_structure(protein_id, fname).get_atoms()
crystals_atoms = parser.atoms()

# To compare atom positions, we build "generic" atoms (just tuples (elem, coords))
# Note: Bio.PDB atoms store element as uppercase strings. Thus, they must be changed to titlecase
# Since numpy arrays are unhashable, they are converted to tuples
# crystals.PDBParser returns atoms in fractional coordinates, so we must also do the same with Bio.PDB atoms
bio_pdb_generic_atoms = set()
for atm in biopdb_atoms:
coords = np.round(frac_coords(atm.coord, parser.lattice_vectors()), 3)
elem = atm.element.title()
bio_pdb_generic_atoms.add(
GenericAtom(elem, tuple(coords))
)

crystals_generic_atoms = set()
for atm in crystals_atoms:
coords = np.round(atm.coords, 3)
crystals_generic_atoms.add(
GenericAtom(atm.element, tuple(coords))
)
self.assertEqual(bio_pdb_generic_atoms,
crystals_generic_atoms)


class TestCIFParser(unittest.TestCase):
""" Test the CIFParser on all CIF files stored herein """

Expand Down

0 comments on commit 695ea64

Please sign in to comment.