Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
4657 lines (3790 sloc) 160 KB
""" Classes to represent modified forms of DNA, RNA, and proteins
:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2019-01-31
:Copyright: 2019, Karr Lab
:License: MIT
"""
from ruamel import yaml
from wc_utils.util.chem import EmpiricalFormula, get_major_micro_species, draw_molecule, OpenBabelUtils
import abc
import attrdict
import copy
import enum
import itertools
import lark
import openbabel
import os
import pkg_resources
import re
import urllib.parse
import warnings
import wc_utils.cache
# setup cache
cache_dir = os.path.expanduser('~/.cache/bpforms')
if not os.path.isdir(cache_dir):
os.makedirs(cache_dir)
cache = wc_utils.cache.Cache(directory=cache_dir)
class Identifier(object):
""" A identifier in a namespace for an external database
Attributes:
ns (:obj:`str`): namespace
id (:obj:`str`): id in namespace
"""
def __init__(self, ns, id):
"""
Args:
ns (:obj:`str`): namespace
id (:obj:`str`): id in namespace
"""
self.ns = ns
self.id = id
@property
def ns(self):
""" Get the namespace
Returns:
:obj:`str`: namespace
"""
return self._ns
@ns.setter
def ns(self, value):
""" Set the namespace
Args:
value (:obj:`str`): namespace
Raises:
:obj:`ValueError`: if the namespace is empty
"""
if not value:
raise ValueError('`ns` cannot be empty')
self._ns = value
@property
def id(self):
""" Get the id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set the id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if the id is empty
"""
if not value:
raise ValueError('`id` cannot be empty')
self._id = value
def __eq__(self, other):
""" Check if two identifiers are semantically equal
Args:
other (:obj:`Identifier`): another identifier
Returns:
:obj:`bool`: True, if the identifiers are semantically equal
"""
return self is other or (self.__class__ == other.__class__ and self.ns == other.ns and self.id == other.id)
def __hash__(self):
""" Generate a hash
Returns:
:obj:`int`: hash
"""
return hash((self.ns, self.id))
class IdentifierSet(set):
""" Set of identifiers """
def __init__(self, identifiers=None):
"""
Args:
identifiers (:obj:iterable of :obj:`Identifier`): iterable of identifiers
"""
super(IdentifierSet, self).__init__()
if identifiers is not None:
for identifier in identifiers:
self.add(identifier)
def add(self, identifier):
""" Add an identifier
Args:
identifier (:obj:`Identifier`): identifier
Raises:
:obj:`ValueError`: if the `identifier` is not an instance of `Indentifier`
"""
if not isinstance(identifier, Identifier):
raise ValueError('`identifier` must be an instance of `Indentifier`')
super(IdentifierSet, self).add(identifier)
def update(self, identifiers):
""" Add a set of identifiers
Args:
identifiers (iterable of :obj:`Identifier`): identifiers
"""
for identifier in identifiers:
self.add(identifier)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`IdentifierSet`): other set of identifiers
"""
if not isinstance(other, IdentifierSet):
other = IdentifierSet(other)
super(IdentifierSet, self).symmetric_difference_update(other)
class SynonymSet(set):
""" Set of synonyms """
def __init__(self, synonyms=None):
"""
Args:
synonyms (:obj:iterable of :obj:`str`): iterable of synonyms
Raises:
:obj:`ValueError`: if synonyms is not an iterable of string
"""
super(SynonymSet, self).__init__()
if isinstance(synonyms, str):
raise ValueError('synonyms must be an iterable of strings')
if synonyms is not None:
for synonym in synonyms:
self.add(synonym)
def add(self, synonym):
""" Add an synonym
Args:
synonym (:obj:`str`): synonym
Raises:
:obj:`ValueError`: if the `synonym` is not an instance of `Indentifier`
"""
if not synonym or not isinstance(synonym, str):
raise ValueError('`synonyms` must be a non-empty string')
super(SynonymSet, self).add(synonym)
def update(self, synonyms):
""" Add a set of synonyms
Args:
synonyms (iterable of :obj:`SynonymSet`): synonyms
"""
for synonym in synonyms:
self.add(synonym)
def symmetric_difference_update(self, other):
""" Remove common synonyms with other and add synonyms from other not in self
Args:
other (:obj:`SynonymSet`): other set of synonyms
"""
if not isinstance(other, SynonymSet):
other = SynonymSet(other)
super(SynonymSet, self).symmetric_difference_update(other)
class Monomer(object):
""" A monomeric form in a biopolymer
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
synonyms (:obj:`set` of :obj:`str`): synonyms
identifiers (:obj:`set` of :obj:`Identifier`, optional): identifiers in namespaces for external databases
structure (:obj:`openbabel.OBMol`): chemical structure
delta_mass (:obj:`float`): additional mass (Dalton) relative to structure
delta_charge (:obj:`int`): additional charge relative to structure
start_position (:obj:`tuple`): uncertainty in the location of the monomeric form
end_position (:obj:`tuple`): uncertainty in the location of the monomeric form
monomers_position (:obj:`set` of :obj:`Monomer`): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
base_monomers (:obj:`set` of :obj:`Monomer`): monomers which this monomeric form is derived from
backbone_bond_atoms (:obj:`AtomList`): atoms from monomeric form that bond to backbone
backbone_displaced_atoms (:obj:`AtomList`): atoms from monomeric form displaced by bond to backbone
r_bond_atoms (:obj:`AtomList`): atoms that bond with right/suceeding/following/forward monomeric form
l_bond_atoms (:obj:`AtomList`): atoms that bond with left/preceding/previous/backward monomeric form
r_displaced_atoms (:obj:`AtomList`): atoms displaced by bond with right/suceeding/following/forward monomeric form
l_displaced_atoms (:obj:`AtomList`): atoms displaced by bond with left/preceding/previous/backward monomeric form
comments (:obj:`str`): comments
"""
def __init__(self, id=None, name=None, synonyms=None, identifiers=None, structure=None,
delta_mass=None, delta_charge=None,
start_position=None, end_position=None, monomers_position=None,
base_monomers=None,
backbone_bond_atoms=None, backbone_displaced_atoms=None,
r_bond_atoms=None, l_bond_atoms=None,
r_displaced_atoms=None, l_displaced_atoms=None,
comments=None):
"""
Attributes:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
synonyms (:obj:`set` of :obj:`str`, optional): synonyms
identifiers (:obj:`set` of :obj:`Identifier`, optional): identifiers in namespaces for external databases
structure (:obj:`openbabel.OBMol` or :obj:`str`, optional): chemical structure
delta_mass (:obj:`float`, optional): additional mass (Dalton) relative to structure
delta_charge (:obj:`float`, optional): additional charge relative to structure
start_position (:obj:`int`, optional): uncertainty in the location of the monomeric form
end_position (:obj:`int`, optional): uncertainty in the location of the monomeric form
monomers_position (:obj:`set` of :obj:`Monomer`, optional): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
base_monomers (:obj:`set` of :obj:`Monomer`, optional): monomers which this monomeric form is derived from
backbone_bond_atoms (:obj:`AtomList`, optional): atoms from monomeric form that bond to backbone
backbone_displaced_atoms (:obj:`AtomList`, optional): atoms from monomeric form displaced by bond to backbone
r_bond_atoms (:obj:`AtomList`, optional): atoms that bond with right/suceeding/following/forward monomeric form
l_bond_atoms (:obj:`AtomList`, optional): atoms that bond with left/preceding/previous/backward monomeric form
r_displaced_atoms (:obj:`AtomList`, optional): atoms displaced by bond with right/suceeding/following/forward monomeric form
l_displaced_atoms (:obj:`AtomList`, optional): atoms displaced by bond with left/preceding/previous/backward monomeric form
comments (:obj:`str`, optional): comments
"""
self.id = id
self.name = name
self.synonyms = synonyms or SynonymSet()
self.identifiers = identifiers or IdentifierSet()
self.structure = structure
self.delta_mass = delta_mass
self.delta_charge = delta_charge
self.start_position = start_position
self.end_position = end_position
self.monomers_position = monomers_position or set()
self.base_monomers = base_monomers or set()
self.backbone_bond_atoms = backbone_bond_atoms or AtomList()
self.backbone_displaced_atoms = backbone_displaced_atoms or AtomList()
self.r_bond_atoms = r_bond_atoms or AtomList()
self.l_bond_atoms = l_bond_atoms or AtomList()
self.r_displaced_atoms = r_displaced_atoms or AtomList()
self.l_displaced_atoms = l_displaced_atoms or AtomList()
self.comments = comments
@property
def id(self):
""" Get id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value and not isinstance(value, str):
raise ValueError('`id` must be a string or None')
self._id = value
@property
def name(self):
""" Get name
Returns:
:obj:`str`: name
"""
return self._name
@name.setter
def name(self, value):
""" Set name
Args:
value (:obj:`str`): name
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value and not isinstance(value, str):
raise ValueError('`name` must be a string or None')
self._name = value
@property
def synonyms(self):
""" Get synonyms
Returns:
:obj:`SynonymSet`: synonyms
"""
return self._synonyms
@synonyms.setter
def synonyms(self, value):
""" Set synonyms
Args:
value (:obj:`SynonymSet`): synonyms
Raises:
:obj:`ValueError`: if `synonyms` is not an instance of `SynonymSet`
"""
if value is None:
raise ValueError('`synonyms` must be an instance `SynonymSet`')
if not isinstance(value, SynonymSet):
value = SynonymSet(value)
self._synonyms = value
@property
def identifiers(self):
""" Get identifiers
Returns:
:obj:`IdentifierSet`: identifiers
"""
return self._identifiers
@identifiers.setter
def identifiers(self, value):
""" Set identifiers
Args:
value (:obj:`IdentifierSet`): identifiers
Raises:
:obj:`ValueError`: if `identifiers` is not an instance of `Indentifiers`
"""
if value is None:
raise ValueError('`identifiers` must be an instance `Indentifiers`')
if not isinstance(value, IdentifierSet):
value = IdentifierSet(value)
self._identifiers = value
@property
def structure(self):
""" Get structure
Returns:
:obj:`openbabel.OBMol`: structure
"""
return self._structure
@structure.setter
def structure(self, value):
""" Set structure
Args:
value (:obj:`openbabel.OBMol` or :obj:`str`): Open Babel molecule, canonical SMILES-encoded structure, or None
Raises:
:obj:`ValueError`: if value is not an Open Babel molecule, canonical SMILES-encoded structure, or None
"""
if value and not isinstance(value, openbabel.OBMol):
ob_mol = openbabel.OBMol()
conversion = openbabel.OBConversion()
assert conversion.SetInFormat('smi'), 'Unable to set format to SMILES'
if not conversion.ReadString(ob_mol, value):
raise ValueError('`structure` must be an Open Babel molecule, canonical SMILES-encoded structure, or None')
value = ob_mol
self._structure = value or None
@property
def delta_mass(self):
""" Get extra mass
Returns:
:obj:`float`: extra mass
"""
return self._delta_mass
@delta_mass.setter
def delta_mass(self, value):
""" Set extra mass
Args:
value (:obj:`float`): extra mass
Raises:
:obj:`ValueError`: if value is not a float or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`delta_mass` must be a float or None')
value = float(value)
self._delta_mass = value
@property
def delta_charge(self):
""" Get extra charge
Returns:
:obj:`int`: extra charge
"""
return self._delta_charge
@delta_charge.setter
def delta_charge(self, value):
""" Set extra charge
Args:
value (:obj:`int`): extra charge
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`delta_charge` must be an integer or None')
if value != int(value):
raise ValueError('`delta_charge` must be an integer or None')
value = int(value)
self._delta_charge = value
@property
def start_position(self):
""" Get start position
Returns:
:obj:`int`: start position
"""
return self._start_position
@start_position.setter
def start_position(self, value):
""" Set start position
Args:
value (:obj:`float`): start position
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`start_position` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`start_position` must be a positive integer or None')
value = int(value)
self._start_position = value
@property
def end_position(self):
""" Get end position
Returns:
:obj:`int`: end position
"""
return self._end_position
@end_position.setter
def end_position(self, value):
""" Set end position
Args:
value (:obj:`float`): end position
Raises:
:obj:`ValueError`: if value is not an int or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`end_position` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`end_position` must be a positive integer or None')
value = int(value)
self._end_position = value
@property
def monomers_position(self):
""" Get the originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Returns:
:obj:`set` of :obj:`Monomer`: originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
"""
return self._monomers_position
@monomers_position.setter
def monomers_position(self, value):
""" Set the originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Args:
value (:obj:`set` of :obj:`Monomer`): originating monomers within :obj:`start_position` to
:obj:`end_position` where the monomeric form may be located
Raises:
:obj:`ValueError`: if value is not an instance of :obj:`set`
"""
if isinstance(value, list):
value = set(value)
if not isinstance(value, set):
raise ValueError('`monomers_position` must be an instance of `set`')
self._monomers_position = value
@property
def base_monomers(self):
""" Get base monomeric forms
Returns:
:obj:`set` of :obj:`Monomer`: base monomeric forms
"""
return self._base_monomers
@base_monomers.setter
def base_monomers(self, value):
""" Set base monomeric forms
Args:
value (:obj:`set` of :obj:`Monomer`): base monomeric forms
Raises:
:obj:`ValueError`: if value is not an instance of :obj:`set`
"""
if isinstance(value, list):
value = set(value)
if not isinstance(value, set):
raise ValueError('`base_monomers` must be an instance of `set`')
self._base_monomers = value
@property
def backbone_bond_atoms(self):
""" Get the atoms from the monomeric form that bond to backbone
Returns:
:obj:`AtomList`: atoms from the monomeric form that bond to backbone
"""
return self._backbone_bond_atoms
@backbone_bond_atoms.setter
def backbone_bond_atoms(self, value):
""" Set the atoms from the monomeric form that bond to backbone
Args:
value (:obj:`AtomList`): atoms from the monomeric form that bond to backbone
Raises:
:obj:`ValueError`: if `backbone_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`backbone_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._backbone_bond_atoms = value
@property
def backbone_displaced_atoms(self):
""" Get the atoms from the monomeric form displaced by the bond to the backbone
Returns:
:obj:`AtomList`: atoms from the monomeric form displaced by the bond to the backbone
"""
return self._backbone_displaced_atoms
@backbone_displaced_atoms.setter
def backbone_displaced_atoms(self, value):
""" Set the atoms from the monomeric form displaced by the bond to the backbone
Args:
value (:obj:`AtomList`): atoms from the monomeric form displaced by the bond to the backbone
Raises:
:obj:`ValueError`: if `backbone_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`backbone_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._backbone_displaced_atoms = value
@property
def r_bond_atoms(self):
""" Get the left bond atoms
Returns:
:obj:`AtomList`: left bond atoms
"""
return self._r_bond_atoms
@r_bond_atoms.setter
def r_bond_atoms(self, value):
""" Set the left bond atoms
Args:
value (:obj:`AtomList`): left bond atoms
Raises:
:obj:`ValueError`: if `r_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_bond_atoms = value
@property
def l_bond_atoms(self):
""" Get the right bond atoms
Returns:
:obj:`AtomList`: right bond atoms
"""
return self._l_bond_atoms
@l_bond_atoms.setter
def l_bond_atoms(self, value):
""" Set the right bond atoms
Args:
value (:obj:`AtomList`): right bond atoms
Raises:
:obj:`ValueError`: if `l_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_bond_atoms = value
@property
def r_displaced_atoms(self):
""" Get the left displaced atoms
Returns:
:obj:`AtomList`: left displaced atoms
"""
return self._r_displaced_atoms
@r_displaced_atoms.setter
def r_displaced_atoms(self, value):
""" Set the left displaced atoms
Args:
value (:obj:`AtomList`): left displaced atoms
Raises:
:obj:`ValueError`: if `r_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_displaced_atoms = value
@property
def l_displaced_atoms(self):
""" Get the right displaced atoms
Returns:
:obj:`AtomList`: right displaced atoms
"""
return self._l_displaced_atoms
@l_displaced_atoms.setter
def l_displaced_atoms(self, value):
""" Set the right displaced atoms
Args:
value (:obj:`AtomList`): right displaced atoms
Raises:
:obj:`ValueError`: if `l_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_displaced_atoms = value
@property
def comments(self):
""" Get comments
Returns:
:obj:`str`: comments
"""
return self._comments
@comments.setter
def comments(self, value):
""" Set comments
Args:
value (:obj:`str`): comments
Raises:
:obj:`ValueError`: if value is not a str or None
"""
if value and not isinstance(value, str):
raise ValueError('`comments` must be a string or None')
self._comments = value
def get_root_monomers(self):
""" Get root monomeric forms
Returns:
:obj:`set` of :obj:`Monomer`: root monomeric forms
"""
if not self.base_monomers:
return set([self])
roots = set()
for base_monomer in self.base_monomers:
roots.update(base_monomer.get_root_monomers())
return roots
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Update to the major protonation and tautomerization state at the pH
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
"""
if self.structure:
structure = get_major_micro_species(self.export('smi', options=('c',)),
'smiles', 'smiles', ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
conv = openbabel.OBConversion()
assert conv.SetInFormat('smi')
assert conv.ReadString(self.structure, structure)
def export(self, format, options=()):
""" Export structure to format
Args:
format (:obj:`str`): format
options (:obj:`list` of :obj:`str`, optional): export options
Returns:
:obj:`str`: format representation of structure
"""
if self.structure:
return OpenBabelUtils.export(self.structure, format, options=options)
else:
return None
IMAGE_URL_PATTERN = ('https://cactus.nci.nih.gov/chemical/structure/{}/image'
'?format=gif'
'&bgcolor=transparent'
'&antialiasing=0')
def get_image_url(self):
""" Get URL for image of structure
Returns:
:obj:`str`: URL for image of structure
"""
smiles = self.export('smi', options=('c',))
if smiles:
return self.IMAGE_URL_PATTERN.format(urllib.parse.quote(smiles))
return None
def get_image(self, bond_label='', displaced_label='', bond_opacity=255, displaced_opacity=63,
backbone_bond_color=0xff0000, left_bond_color=0x00ff00, right_bond_color=0x0000ff,
include_all_hydrogens=True, show_atom_nums=False,
atom_label_font_size=0.6,
width=200, height=200, image_format='svg', include_xml_header=True):
""" Get image
Args:
bond_label (:obj:`str`, optional): label for atoms involved in bonds
displaced_label (:obj:`str`, optional): labels for atoms displaced by bond formation
bond_opacity (:obj:`int`, optional): opacity of atoms involved in bonds
displaced_opacity (:obj:`int`, optional): opacity of atoms dislaced by bond formation
backbone_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with backbone
left_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with monomeric form to left
right_bond_color (:obj:`int`, optional): color to paint atoms involved in bond with monomeric form to right
include_all_hydrogens (:obj:`bool`, optional): if :obj:`True`, show all hydrogens
show_atom_nums (:obj:`bool`, optional): if :obj:`True`, show the numbers of the atoms
atom_label_font_size (:obj:`float`, optional): relative atom label font size
width (:obj:`int`, optional): width in pixels
height (:obj:`int`, optional): height in pixels
image_format (:obj:`str`, optional): format of generated image {emf, eps, jpeg, msbmp, pdf, png, or svg}
include_xml_header (:obj:`bool`, optional): if :obj:`True`, include XML header at the beginning of the SVG
Returns:
:obj:`object`: image
"""
if not self.structure:
return None
atom_md_types = [
(self.backbone_bond_atoms, bond_label, self._blend_color_opacity(backbone_bond_color, bond_opacity)),
(self.backbone_displaced_atoms, displaced_label, self._blend_color_opacity(backbone_bond_color, displaced_opacity)),
(self.r_bond_atoms, bond_label, self._blend_color_opacity(left_bond_color, bond_opacity)),
(self.r_displaced_atoms, displaced_label, self._blend_color_opacity(left_bond_color, displaced_opacity)),
(self.l_bond_atoms, bond_label, self._blend_color_opacity(right_bond_color, bond_opacity)),
(self.l_displaced_atoms, displaced_label, self._blend_color_opacity(right_bond_color, displaced_opacity)),
]
atom_labels = []
atom_sets = {}
mol = openbabel.OBMol()
mol += self.structure
if include_all_hydrogens:
mol.AddHydrogens()
for atom_mds, label, color in atom_md_types:
bonding_hydrogens = []
for atom_md in atom_mds:
atom = mol.GetAtom(atom_md.position)
if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
atom = get_hydrogen_atom(atom, bonding_hydrogens, 0)
if atom:
atom_labels.append({'position': atom.GetIdx(), 'element': atom_md.element, 'label': label, 'color': color})
if color not in atom_sets:
atom_sets[color] = {'positions': [], 'elements': [], 'color': color}
atom_sets[color]['positions'].append(atom.GetIdx())
atom_sets[color]['elements'].append(atom_md.element)
cml = OpenBabelUtils.export(mol, 'cml')
return draw_molecule(cml, 'cml', image_format=image_format,
atom_labels=atom_labels, atom_sets=atom_sets.values(),
show_atom_nums=show_atom_nums,
atom_label_font_size=atom_label_font_size,
width=width, height=height,
include_xml_header=include_xml_header)
@staticmethod
def _blend_color_opacity(color, opacity):
""" Blend color with white to simulate opacity
Args:
color (:obj:`int`): color (0-0xffffff)
opacity (:obj:`int`): opacity (0-0xff)
Returns:
:obj:`int`: blended color
"""
r = color >> 16
g = (color - r * 2**16) >> 8
b = color - r * 2**16 - g * 2**8
w = 0xff
r_opaque = round((r * opacity + w * (255 - opacity)) / 255)
g_opaque = round((g * opacity + w * (255 - opacity)) / 255)
b_opaque = round((b * opacity + w * (255 - opacity)) / 255)
return (r_opaque << 16) + (g_opaque << 8) + b_opaque
def get_formula(self):
""" Get the chemical formula
Returns:
:obj:`EmpiricalFormula`: chemical formula
"""
if not self.structure:
raise ValueError('A structure must be defined to calculate the formula')
return OpenBabelUtils.get_formula(self.structure)
def get_mol_wt(self):
""" Get the molecular weight
Returns:
:obj:`float`: molecular weight
"""
if self.structure:
return self.get_formula().get_molecular_weight() + (self.delta_mass or 0.)
return None
def get_charge(self):
""" Get the charge
Returns:
:obj:`int`: charge
"""
if not self.structure:
raise ValueError('A structure must be defined to calculate the charge')
return self.structure.GetTotalCharge() + (self.delta_charge or 0)
def to_dict(self, alphabet=None):
""" Get a dictionary representation of the monomeric form
Args:
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`dict`: dictionary representation of the monomeric form
"""
dict = {}
attrs = ['id', 'name', 'delta_mass', 'delta_charge', 'start_position', 'end_position', 'comments']
for attr in attrs:
val = getattr(self, attr)
if val is not None:
dict[attr] = val
if self.synonyms:
dict['synonyms'] = list(self.synonyms)
if self.identifiers:
dict['identifiers'] = [{'ns': i.ns, 'id': i.id} for i in self.identifiers]
if self.structure:
dict['structure'] = self.export('smi', options=('c',))
if self.monomers_position and alphabet:
dict['monomers_position'] = []
for monomer in self.monomers_position:
monomer_code = alphabet.get_monomer_code(monomer)
dict['monomers_position'].append(monomer_code)
if self.base_monomers and alphabet:
dict['base_monomers'] = []
for monomer in self.base_monomers:
monomer_code = alphabet.get_monomer_code(monomer)
dict['base_monomers'].append(monomer_code)
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
atoms = getattr(self, attr_name)
if atoms:
dict[attr_name] = []
for atom in atoms:
dict[attr_name].append(atom.to_dict())
return dict
def from_dict(self, dict, alphabet=None):
""" Get a dictionary representation of the monomeric form
Args:
dict (:obj:`dict`): dictionary representation of the monomeric form
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`Monomer`: monomeric form
"""
self.id = None
self.name = None
self.synonyms.clear()
self.identifiers.clear()
self.structure = None
self.delta_mass = None
self.delta_charge = None
self.start_position = None
self.end_position = None
self.monomers_position.clear()
self.base_monomers.clear()
self.comments = None
attrs = ['id', 'name', 'delta_mass', 'delta_charge', 'start_position', 'end_position', 'comments']
for attr in attrs:
val = dict.get(attr, None)
if val is not None:
setattr(self, attr, val)
synonyms = dict.get('synonyms', [])
if synonyms:
self.synonyms = SynonymSet(synonyms)
identifiers = dict.get('identifiers', [])
if identifiers:
self.identifiers = IdentifierSet([Identifier(i['ns'], i['id']) for i in identifiers])
structure = dict.get('structure', None)
if structure:
self.structure = structure
monomers_position_ids = dict.get('monomers_position', [])
if monomers_position_ids and alphabet:
self.monomers_position = set([alphabet.monomers.get(monomer_id) for monomer_id in monomers_position_ids])
base_monomer_ids = dict.get('base_monomers', [])
if base_monomer_ids and alphabet:
self.base_monomers = set([alphabet.monomers.get(monomer_id) for monomer_id in base_monomer_ids])
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
atoms = getattr(self, attr_name)
atoms.from_list(dict.get(attr_name, []))
return self
def __str__(self, alphabet=None):
""" Get a string representation of the monomeric form
Args:
alphabet (:obj:`Alphabet`, optional): alphabet
Returns:
:obj:`str`: string representation of the monomeric form
"""
els = []
if self.id:
els.append('id: "' + self.id + '"')
if self.name:
els.append('name: "' + self.name.replace('"', '\\"') + '"')
for synonym in self.synonyms:
els.append('synonym: "' + synonym.replace('"', '\\"') + '"')
for identifier in self.identifiers:
els.append('identifier: "' + identifier.id + '" @ "' + identifier.ns + '"')
if self.structure:
els.append('structure: "' + self.export('smi', options=('c',)) + '"')
atom_types = [
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms',
]
for atom_type in atom_types:
for atom in getattr(self, atom_type):
if atom.charge > 0:
charge = '+' + str(atom.charge)
elif atom.charge == 0:
charge = ''
else:
charge = str(atom.charge)
els.append('{}: {}{}{}'.format(
atom_type[:-1].replace('_', '-'), atom.element, atom.position, charge))
if self.delta_mass is not None:
els.append('delta-mass: ' + str(self.delta_mass))
if self.delta_charge is not None:
els.append('delta-charge: ' + str(self.delta_charge))
if self.start_position is not None or self.end_position is not None:
el = 'position: {}-{}'.format(self.start_position or '', self.end_position or '')
if self.monomers_position and alphabet:
codes = sorted(alphabet.get_monomer_code(monomer) for monomer in self.monomers_position)
el += ' [{}]'.format(' | '.join(codes))
els.append(el)
if alphabet:
for monomer in self.base_monomers:
els.append('base-monomer: "{}"'.format(alphabet.get_monomer_code(monomer)))
if self.comments:
els.append('comments: "' + self.comments.replace('"', '\\"') + '"')
return '[' + ' | '.join(els) + ']'
def get_canonical_code(self, monomer_codes, default_code='?'):
""" Get IUPAC/IUBMB representation of a monomeric form using the character code
of its parent monomer (e.g. 'methyl-2-adenosine' is represented by 'A')
Args:
monomer_codes (:obj:`dict`): dictionary that maps monomeric forms to codes
default_code (:obj:`str`): default code
Returns:
:obj:`str`: IUPAC/IUBMB representation of monomeric form
"""
roots = self.get_root_monomers()
root_codes = list(set(monomer_codes.get(root, default_code) for root in roots))
if len(root_codes) == 1 and len(root_codes[0]) == 1:
return root_codes[0]
else:
return default_code
def is_equal(self, other):
""" Check if two monomeric forms are semantically equal
Args:
other (:obj:`Monomer`): another monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the objects have the same structure
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
attrs = ['id', 'name', 'synonyms', 'identifiers',
'delta_mass', 'delta_charge', 'start_position', 'end_position',
'comments']
for attr in attrs:
if getattr(self, attr) != getattr(other, attr):
return False
if self.export('inchi') != other.export('inchi'):
return False
for attr in ['monomers_position', 'base_monomers']:
if len(getattr(self, attr)) != len(getattr(other, attr)):
return False
for base_monomer in getattr(self, attr):
has_equal = False
for other_base_monomer in getattr(other, attr):
if base_monomer.is_equal(other_base_monomer):
has_equal = True
break
if not has_equal:
return False
attr_names = (
'backbone_bond_atoms', 'backbone_displaced_atoms',
'r_bond_atoms', 'l_bond_atoms',
'r_displaced_atoms', 'l_displaced_atoms')
for attr_name in attr_names:
self_atoms = getattr(self, attr_name)
other_atoms = getattr(other, attr_name)
if not self_atoms.is_equal(other_atoms):
return False
return True
class MonomerSequence(list):
""" Sequence of monomeric forms """
def __init__(self, monomers=None):
"""
Args:
monomers (:obj:iterable of :obj:`Monomer`): iterable of monomeric forms
"""
super(MonomerSequence, self).__init__()
if monomers is not None:
for monomer in monomers:
self.append(monomer)
def append(self, monomer):
""" Add a monomeric form
Args:
monomer (:obj:`Monomer`): monomeric form
Raises:
:obj:`ValueError`: if the `monomer` is not an instance of `Monomer`
"""
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be an instance of `Monomer`')
super(MonomerSequence, self).append(monomer)
def extend(self, monomers):
""" Add a list of monomeric forms
Args:
monomers (iterable of :obj:`Monomer`): iterable of monomeric forms
"""
for monomer in monomers:
self.append(monomer)
def insert(self, i, monomer):
""" Insert a monomeric form at a position
Args:
i (:obj:`int`): position to insert monomeric form
monomer (:obj:`Monomer`): monomeric form
"""
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be an instance of `Monomer`')
super(MonomerSequence, self).insert(i, monomer)
def __setitem__(self, slice, monomer):
""" Set monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s) to set monomeric form
monomer (:obj:`Monomer` or :obj:`list` of :obj:`Monomer`): monomeric form(s)
"""
if isinstance(slice, int):
if not isinstance(monomer, Monomer):
raise ValueError('`monomer` must be a `Monomer`')
else:
for b in monomer:
if not isinstance(b, Monomer):
raise ValueError('`monomer` must be an iterable of `Monomer`')
super(MonomerSequence, self).__setitem__(slice, monomer)
def get_monomer_counts(self):
""" Get the frequency of each monomeric form within the sequence
Returns:
:obj:`dict`: dictionary that maps monomeric forms to their counts
"""
counts = {}
for monomer in self:
if monomer in counts:
counts[monomer] += 1
else:
counts[monomer] = 1
return counts
def is_equal(self, other):
""" Determine if two sequences of monomeric forms are semantically equal
Args:
other (:obj:`MonomerSequence`): other sequence
Returns:
:obj:`bool`: True, of the sequences are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__ or len(self) != len(other):
return False
for self_monomer, other_monomer in zip(self, other):
if not self_monomer.is_equal(other_monomer):
return False
return True
class MonomerDict(attrdict.AttrDict):
""" Dictionary for monomeric forms """
def __setitem__(self, code, monomer):
""" Set monomeric form with code
Args:
code (:obj:`str`): characters for monomeric form
monomer (:obj:`Monomer`): monomeric form
"""
pattern = (r'^([^\[\]\{\}":\| \t\f\r\n]|'
r'[^\[\]\{\}":\| \t\f\r\n][^\[\]\{\}":\|\t\f\r\n]*[^\[\]\{\}":\| \t\f\r\n])$')
if not re.match(pattern, code):
raise ValueError(f'`code` "{code}" must be at least one character, excluding '
'square brackets, curly brackets, double quotes, colons, and pipes '
'and must not begin or end with a white space')
super(MonomerDict, self).__setitem__(code, monomer)
class Alphabet(object):
""" Alphabet for monomeric forms
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
description (:obj:`str`): description
monomers (:obj:`dict`): monomeric forms
"""
def __init__(self, id=None, name=None, description=None, monomers=None):
"""
Args:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
description (:obj:`str`, optional): description
monomers (:obj:`dict`, optional): monomeric forms
"""
self.id = id
self.name = name
self.description = description
self.monomers = monomers or MonomerDict()
@property
def monomers(self):
""" Get the monomeric forms
Returns:
:obj:`MonomerDict`: monomeric forms
"""
return self._monomers
@monomers.setter
def monomers(self, value):
""" Set the monomeric forms
Args:
value (:obj:`MonomerDict`): monomeric forms
Raises:
:obj:`ValueError`: if `monomers` is not an instance of `MonomerDict`
"""
if value is None:
raise ValueError('`monomers` must be an instance of `MonomerDict`')
if not isinstance(value, MonomerDict):
value = MonomerDict(value)
self._monomers = value
def get_monomer_code(self, monomer):
""" Get the code for a monomeric form in the alphabet
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`str`: code for monomeric form
Raises:
:obj:`ValueError`: if monomeric form is not in alphabet
"""
for code, alph_monomer in self.monomers.items():
if monomer == alph_monomer:
return code
raise ValueError('Monomer {} is not in alphabet'.format(monomer.id))
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Calculate the major protonation and tautomerization of each monomeric form
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
"""
monomers = list(filter(lambda monomer: monomer.structure is not None, self.monomers.values()))
structures = []
for monomer in monomers:
structure = monomer.export('smi', options=('c',))
structures.append(structure)
new_structures = get_major_micro_species(structures, 'smiles', 'smiles',
ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
for monomer, new_structure in zip(monomers, new_structures):
conv = openbabel.OBConversion()
assert conv.SetInFormat('smi')
assert conv.ReadString(monomer.structure, new_structure)
def is_equal(self, other):
""" Determine two alphabets are semantically equal
Args:
other (:obj:`type`): other alphabet
Returns:
:obj:`bool`: True, if the alphabets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
for attr in ['id', 'name', 'description']:
if getattr(self, attr) != getattr(other, attr):
return False
if len(self.monomers) != len(other.monomers):
return False
for code, self_monomer in self.monomers.items():
if not self_monomer.is_equal(other.monomers.get(code, None)):
return False
return True
def to_dict(self):
""" Get dictionary representation of alphabet
Returns:
:obj:`dict`: dictionary representation of alphabet
"""
dict = {}
for attr in ['id', 'name', 'description']:
val = getattr(self, attr)
if val:
dict[attr] = val
dict['monomers'] = {}
for code, monomer in self.monomers.items():
dict['monomers'][code] = monomer.to_dict(alphabet=self)
return dict
def from_dict(self, dict):
""" Create alphabet from a dictionary representation
Args:
dict (:obj:`dict`): dictionary representation of alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
for attr in ['id', 'name', 'description']:
val = dict.get(attr, None)
setattr(self, attr, val)
self.monomers.clear()
for code, monomer in dict['monomers'].items():
self.monomers[code] = Monomer().from_dict(monomer)
for code, monomer in dict['monomers'].items():
self.monomers[code].from_dict(monomer, alphabet=self)
return self
def to_yaml(self, path):
""" Save alphabet to YAML file
Args:
path (:obj:`str`): path to save alphabet in YAML format
"""
yaml_writer = yaml.YAML()
yaml_writer.default_flow_style = False
with open(path, 'wb') as file:
yaml_writer.dump(self.to_dict(), file)
def from_yaml(self, path):
""" Read alphabet from YAML file
Args:
path (:obj:`str`): path to YAML file which defines alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
self.from_dict(parse_yaml(path))
return self
@cache.memoize(typed=False, expire=30 * 24 * 60 * 60, filename_args=[0])
def parse_yaml(path):
""" Read a YAML file
Args:
path (:obj:`str`): path to YAML file
Returns:
:obj:`object`: content of file
"""
yaml_reader = yaml.YAML()
with open(path, 'rb') as file:
return yaml_reader.load(file)
class AlphabetBuilder(abc.ABC):
""" Builder for alphabets
Attributes:
_max_monomers (:obj:`float`): maximum number of monomeric forms to build; used to limit length of tests
"""
def __init__(self, _max_monomers=float('inf')):
"""
Args:
_max_monomers (:obj:`float`, optional): maximum number of monomeric forms to build; used to limit length of tests
"""
self._max_monomers = _max_monomers
def run(self, ph=None, major_tautomer=False, dearomatize=False, path=None):
""" Build alphabet and, optionally, save to YAML file
Args:
ph (:obj:`float`, optional): pH at which to calculate the major protonation state of each monomeric form
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
path (:obj:`str`, optional): path to save alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
alphabet = self.build(ph=ph, major_tautomer=major_tautomer, dearomatize=dearomatize)
if path:
self.save(alphabet, path)
return alphabet
@abc.abstractmethod
def build(self, ph=None, major_tautomer=False, dearomatize=False):
""" Build alphabet
Args:
ph (:obj:`float`, optional): pH at which to calculate the major protonation state of each monomeric form
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
Returns:
:obj:`Alphabet`: alphabet
"""
pass # pragma: no cover
def save(self, alphabet, path):
""" Save alphabet to YAML file
Args:
alphabet (:obj:`Alphabet`): alphabet
path (:obj:`str`): path to save alphabet
"""
alphabet.to_yaml(path)
class Atom(object):
""" An atom in a compound or bond
Attributes:
molecule (:obj:`type`): type of parent molecule
element (:obj:`str`): code for the element (e.g. 'H')
position (:obj:`int`): position of the atom within the molecule, which should use canonical SMILES atom numbers
charge (:obj:`int`): charge of the atom
monomer (:obj:`int`): index of parent monomeric form within sequence
"""
def __init__(self, molecule, element, position=None, charge=0, monomer=None):
"""
Args:
molecule (:obj:`type`): type of parent molecule
element (:obj:`str`, optional): code for the element (e.g. 'H')
position (:obj:`int`, optional): position of the atom within the molecule, which should use
canonical SMILES atom numbers
charge (:obj:`int`, optional): charge of the atom
monomer (:obj:`int`, optional): index of parent monomeric form within sequence
"""
self.molecule = molecule
self.element = element
self.position = position
self.charge = charge
self.monomer = monomer
@property
def molecule(self):
""" Get type of parent molecule
Returns:
:obj:`type`: type of parent molecule
"""
return self._molecule
@molecule.setter
def molecule(self, value):
""" Set the type of parent molecule
Args:
value (:obj:`type`): type of parent molecule
Raises:
:obj:`ValueError`: if `molecule` is not :obj:`None`, :obj:`Monomer`, or :obj:`Backbone`
"""
if value not in [None, Monomer, Backbone]:
raise ValueError('`molecule` must be `None`, `Monomer`, or `Backbone`')
self._molecule = value
@property
def element(self):
""" Get the element
Returns:
:obj:`str`: element
"""
return self._element
@element.setter
def element(self, value):
""" Set the element
Args:
value (:obj:`str`): element
"""
if not isinstance(value, str):
raise ValueError('`element` must be a string')
self._element = value
@property
def position(self):
""" Get the position
Returns:
:obj:`int`: position
"""
return self._position
@position.setter
def position(self, value):
""" Set the position
Args:
value (:obj:`int`): position
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`position` must be a positive integer or None')
value = int(value)
self._position = value
@property
def charge(self):
""" Get the charge
Returns:
:obj:`str`: charge
"""
return self._charge
@charge.setter
def charge(self, value):
""" Set the charge
Args:
value (:obj:`str`): charge
"""
if not isinstance(value, (float, int)) or int(value) != value:
raise ValueError('`charge` must be an integer')
value = int(value)
self._charge = value
@property
def monomer(self):
""" Get the index of the parent monomer within the sequence
Returns:
:obj:`int`: index of the parent monomer within the sequence
"""
return self._monomer
@monomer.setter
def monomer(self, value):
""" Set the index of the parent monomer within the sequence
Args:
value (:obj:`int`): index of the parent monomer within the sequence
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`monomer` must be a positive integer or None')
value = int(value)
self._monomer = value
def is_equal(self, other):
""" Determine if two atoms are semantically equal
Args:
other (:obj:`Atom`): other atom
Returns:
:obj:`bool`: obj:`True` if the atoms are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
return self is other or (self.__class__ == other.__class__
and self.molecule == other.molecule
and self.element == other.element
and self.position == other.position
and self.charge == other.charge
and self.monomer == other.monomer)
def to_dict(self):
""" Get dictionary representation
Returns:
:obj:`dict`: dictionary representation
"""
dict = {}
if self.molecule:
dict['molecule'] = self.molecule.__name__
dict['element'] = self.element
if self.position is not None:
dict['position'] = self.position
if self.charge:
dict['charge'] = self.charge
if self.monomer is not None:
dict['monomer'] = self.monomer
return dict
def from_dict(self, dict):
""" Load from dictionary representation
Args:
dict (:obj:`dict`): dictionary representation
Returns:
:obj:`Atom`: atom
"""
molecule = dict.get('molecule', None)
if molecule == 'Monomer':
self.molecule = Monomer
elif molecule == 'Backbone':
self.molecule = Backbone
else:
self.molecule = None
self.element = dict['element']
self.position = dict.get('position', None)
self.charge = dict.get('charge', 0)
self.monomer = dict.get('monomer', None)
return self
class AtomList(list):
""" List of atoms """
def __init__(self, atoms=None):
"""
Args:
atoms (:obj:iterable of :obj:`Atom`): iterable of atoms
"""
super(AtomList, self).__init__()
if atoms is not None:
for atom in atoms:
self.append(atom)
def append(self, atom):
""" Add a atom
Args:
atom (:obj:`Atom`): atom
Raises:
:obj:`ValueError`: if the `atom` is not an instance of `Atom`
"""
if not isinstance(atom, Atom):
raise ValueError('`atom` must be an instance of `Atom`')
super(AtomList, self).append(atom)
def extend(self, atoms):
""" Add a list of atoms
Args:
atoms (iterable of :obj:`Atom`): iterable of atoms
"""
for atom in atoms:
self.append(atom)
def insert(self, i, atom):
""" Insert an atom at a position
Args:
i (:obj:`int`): position to insert atom
atom (:obj:`Atom`): atom
"""
if not isinstance(atom, Atom):
raise ValueError('`atom` must be an instance of `Atom`')
super(AtomList, self).insert(i, atom)
def __setitem__(self, slice, atom):
""" Set atom(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s) to set atom
atom (:obj:`Atom` or :obj:`AtomList`): atom or atoms
"""
if isinstance(slice, int):
if not isinstance(atom, Atom):
raise ValueError('`atom` must be a `Atom`')
else:
for b in atom:
if not isinstance(b, Atom):
raise ValueError('`atom` must be an iterable of `Atom`')
super(AtomList, self).__setitem__(slice, atom)
def is_equal(self, other):
""" Determine if two lists of atoms are semantically equal
Args:
other (:obj:`AtomList`): other list of atoms
Returns:
:obj:`bool`: True, of the lists of atoms are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__ or len(self) != len(other):
return False
for self_atom, other_atom in zip(self, other):
if not self_atom.is_equal(other_atom):
return False
return True
def to_list(self):
""" Get list representation
Returns:
:obj:`list`: list representation
"""
list = []
for atom in self:
list.append(atom.to_dict())
return list
def from_list(self, list):
""" Load from list representation
Args:
list (:obj:`list`): list representation
Returns:
:obj:`AtomList`: atom list
"""
self.clear()
for atom in list:
self.append(Atom(None, '').from_dict(atom))
return self
class Backbone(object):
""" Backbone of a monomeric form
Attributes:
structure (:obj:`openbabel.OBMol`): chemical structure
monomer_bond_atoms (:obj:`AtomList`): atoms from backbone that bond to monomeric form
monomer_displaced_atoms (:obj:`AtomList`): atoms from backbone displaced by bond to monomeric form
"""
def __init__(self, structure=None, monomer_bond_atoms=None, monomer_displaced_atoms=None):
"""
Args:
structure (:obj:`str` or :obj:`openbabel.OBMol`, optional): chemical structure as canonical
SMILES-encoded string or Open Babel molecule
monomer_bond_atoms (:obj:`AtomList`, optional): atoms from backbone that bond to monomeric form
monomer_displaced_atoms (:obj:`AtomList`, optional): atoms from backbone displaced by bond to monomeric form
"""
self.structure = structure
self.monomer_bond_atoms = monomer_bond_atoms or AtomList()
self.monomer_displaced_atoms = monomer_displaced_atoms or AtomList()
@property
def structure(self):
""" Get the structure
Returns:
:obj:`openbabel.OBMol`: structure
"""
return self._structure
@structure.setter
def structure(self, value):
""" Set the structure
Args:
value (:obj:`str` or :obj:`openbabel.OBMol`): structure as canonical SMILES-encoded string or Open Babel molecule
"""
if value is not None and not isinstance(value, openbabel.OBMol):
ob_mol = openbabel.OBMol()
conversion = openbabel.OBConversion()
assert conversion.SetInFormat('smi'), 'Unable to set format to SMILES'
if not conversion.ReadString(ob_mol, value):
raise ValueError('`structure` must be an Open Babel molecule, canonical SMILES-encoded structure, or None')
value = ob_mol
self._structure = value
@property
def monomer_bond_atoms(self):
""" Get the backbone bond atoms
Returns:
:obj:`AtomList`: backbone bond atoms
"""
return self._monomer_bond_atoms
@monomer_bond_atoms.setter
def monomer_bond_atoms(self, value):
""" Set the backbone bond atoms
Args:
value (:obj:`AtomList`): backbone bond atoms
Raises:
:obj:`ValueError`: if `monomer_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`monomer_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._monomer_bond_atoms = value
@property
def monomer_displaced_atoms(self):
""" Get the backbone displaced atoms
Returns:
:obj:`AtomList`: backbone displaced atoms
"""
return self._monomer_displaced_atoms
@monomer_displaced_atoms.setter
def monomer_displaced_atoms(self, value):
""" Set the backbone displaced atoms
Args:
value (:obj:`AtomList`): backbone displaced atoms
Raises:
:obj:`ValueError`: if `monomer_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`monomer_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._monomer_displaced_atoms = value
def export(self, format, options=()):
""" Export structure to format
Args:
format (:obj:`str`): format
options (:obj:`list` of :obj:`str`, optional): export options
Returns:
:obj:`str`: format representation of structure
"""
if self.structure:
return OpenBabelUtils.export(self.structure, format, options=options)
return None
def get_formula(self):
""" Get the formula
Returns:
:obj:`EmpiricalFormula`: formula
"""
if self.structure:
formula = OpenBabelUtils.get_formula(self.structure)
else:
formula = EmpiricalFormula()
for atom in self.monomer_displaced_atoms:
formula[atom.element] -= 1
return formula
def get_mol_wt(self):
""" Get the molecular weight
Returns:
:obj:`float`: molecular weight
"""
return self.get_formula().get_molecular_weight()
def get_charge(self):
""" Get the charge
Returns:
:obj:`int`: charge
"""
if self.structure:
charge = self.structure.GetTotalCharge()
else:
charge = 0
for atom in self.monomer_bond_atoms:
charge -= atom.charge
for atom in self.monomer_displaced_atoms:
charge -= atom.charge
return charge
def is_equal(self, other):
""" Determine if two backbones are semantically equal
Args:
other (:obj:`Backbone`): other backbone
Returns:
:obj:`bool`: :obj:`True` if the backbones are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if self.export('inchi') != other.export('inchi'):
return False
if not self.monomer_bond_atoms.is_equal(other.monomer_bond_atoms)\
or not self.monomer_displaced_atoms.is_equal(other.monomer_displaced_atoms):
return False
return True
class BondOrder(int, enum.Enum):
""" Bond order """
single = 1
double = 2
triple = 3
aromatic = 4
class BondStereo(int, enum.Enum):
""" Bond stereochemistry """
wedge = 1
hash = 2
up = 3
down = 4
class BondBase(abc.ABC):
@abc.abstractmethod
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
pass # pragma: no cover
@abc.abstractmethod
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
pass # pragma: no cover
@abc.abstractmethod
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
pass # pragma: no cover
def get_formula(self, none_position=True):
""" Get the formula
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`EmpiricalFormula`: formula
"""
formula = EmpiricalFormula()
for atom in self.l_displaced_atoms:
if atom.position is not None or none_position:
formula[atom.element] -= 1
for atom in self.r_displaced_atoms:
if atom.position is not None or none_position:
formula[atom.element] -= 1
return formula
def get_mol_wt(self, none_position=True):
""" Get the molecular weight
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`float`: molecular weight
"""
return self.get_formula(none_position=none_position).get_molecular_weight()
def get_charge(self, none_position=True):
""" Get the charge
Args:
none_position (:obj:`bool`, optional): include atoms whose position is :obj:`None`
Returns:
:obj:`int`: charge
"""
charge = 0
for atom in self.l_bond_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.l_displaced_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.r_bond_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
for atom in self.r_displaced_atoms:
if atom.position is not None or none_position:
charge -= atom.charge
return charge
class Bond(BondBase):
""" Bond between monomeric forms (inter-residue bond or crosslink)
Attributes:
id (:obj:`str`): id
name (:obj:`str`): name
synonyms (:obj:`SynonymSet`): synonyms
l_monomer (:obj:`Monomer`): left monomeric form
r_monomer (:obj:`Monomer`): right monomeric form
l_bond_atoms (:obj:`AtomList`): atoms from left monomeric form that bond with right monomeric form
r_bond_atoms (:obj:`AtomList`): atoms from right monomeric form that bond with left monomeric form
l_displaced_atoms (:obj:`AtomList`): atoms from left monomeric form displaced by bond
r_displaced_atoms (:obj:`AtomList`): atoms from right monomeric form displaced by bond
order (:obj:`BondOrder`): order
stereo (:obj:`BondStereo`): stereochemistry
comments (:obj:`str`): comments
"""
def __init__(self, id=None, name=None, synonyms=None,
l_monomer=None, r_monomer=None,
l_bond_atoms=None, r_bond_atoms=None,
l_displaced_atoms=None, r_displaced_atoms=None,
order=BondOrder.single, stereo=None,
comments=None):
"""
Args:
id (:obj:`str`, optional): id
name (:obj:`str`, optional): name
synonyms (:obj:`SynonymSet`, optional): synonyms
l_monomer (:obj:`Monomer`, optional): left monomeric form
r_monomer (:obj:`Monomer`, optional): right monomeric form
l_bond_atoms (:obj:`AtomList`, optional): atoms from left monomeric form that bond with right monomeric form
r_bond_atoms (:obj:`AtomList`, optional): atoms from right monomeric form that bond with left monomeric form
l_displaced_atoms (:obj:`AtomList`, optional): atoms from left monomeric form displaced by bond
r_displaced_atoms (:obj:`AtomList`, optional): atoms from right monomeric form displaced by bond
order (:obj:`BondOrder`, optional): order
stereo (:obj:`BondStereo`, optional): stereochemistry
comments (:obj:`str`, optional): comments
"""
self.id = id
self.name = name
self.synonyms = synonyms or SynonymSet()
self.l_monomer = l_monomer
self.r_monomer = r_monomer
self.l_bond_atoms = l_bond_atoms or AtomList()
self.r_bond_atoms = r_bond_atoms or AtomList()
self.l_displaced_atoms = l_displaced_atoms or AtomList()
self.r_displaced_atoms = r_displaced_atoms or AtomList()
self.order = order
self.stereo = stereo
self.comments = comments
@property
def id(self):
""" Get id
Returns:
:obj:`str`: id
"""
return self._id
@id.setter
def id(self, value):
""" Set id
Args:
value (:obj:`str`): id
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value is not None and not isinstance(value, str):
raise ValueError('`id` must be a string or None')
self._id = value
@property
def name(self):
""" Get name
Returns:
:obj:`str`: name
"""
return self._name
@name.setter
def name(self, value):
""" Set name
Args:
value (:obj:`str`): name
Raises:
:obj:`ValueError`: if `value` is not a string or None
"""
if value is not None and not isinstance(value, str):
raise ValueError('`name` must be a string or None')
self._name = value
@property
def synonyms(self):
""" Get synonyms
Returns:
:obj:`SynonymSet`: synonyms
"""
return self._synonyms
@synonyms.setter
def synonyms(self, value):
""" Set synonyms
Args:
value (:obj:`SynonymSet`): synonyms
Raises:
:obj:`ValueError`: if `synonyms` is not an instance of `SynonymSet`
"""
if value is None:
raise ValueError('`synonyms` must be an instance `SynonymSet`')
if not isinstance(value, SynonymSet):
value = SynonymSet(value)
self._synonyms = value
@property
def l_monomer(self):
""" Get the left monomeric form
Returns:
:obj:`Monomer`: left monomeric form
"""
return self._l_monomer
@l_monomer.setter
def l_monomer(self, value):
""" Set the left monomeric form
Args:
value (:obj:`Monomer`): left monomeric form
Raises:
:obj:`ValueError`: if `l_monomer` is not an instance of `Monomer` or `None`
"""
if not (isinstance(value, Monomer) or value is None):
raise ValueError('`l_monomer` must be an instance of `Monomer` or `None`')
self._l_monomer = value
@property
def r_monomer(self):
""" Get the right monomeric form
Returns:
:obj:`Monomer`: right monomeric form
"""
return self._r_monomer
@r_monomer.setter
def r_monomer(self, value):
""" Set the right monomeric form
Args:
value (:obj:`Monomer`): right monomeric form
Raises:
:obj:`ValueError`: if `r_monomer` is not an instance of `Monomer` or `None`
"""
if not (isinstance(value, Monomer) or value is None):
raise ValueError('`r_monomer` must be an instance of `Monomer` or `None`')
self._r_monomer = value
@property
def l_bond_atoms(self):
""" Get the left bond atoms
Returns:
:obj:`AtomList`: left bond atoms
"""
return self._l_bond_atoms
@l_bond_atoms.setter
def l_bond_atoms(self, value):
""" Set the left bond atoms
Args:
value (:obj:`AtomList`): left bond atoms
Raises:
:obj:`ValueError`: if `l_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_bond_atoms = value
@property
def r_bond_atoms(self):
""" Get the right bond atoms
Returns:
:obj:`AtomList`: right bond atoms
"""
return self._r_bond_atoms
@r_bond_atoms.setter
def r_bond_atoms(self, value):
""" Set the right bond atoms
Args:
value (:obj:`AtomList`): right bond atoms
Raises:
:obj:`ValueError`: if `r_bond_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_bond_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_bond_atoms = value
@property
def l_displaced_atoms(self):
""" Get the left displaced atoms
Returns:
:obj:`AtomList`: left displaced atoms
"""
return self._l_displaced_atoms
@l_displaced_atoms.setter
def l_displaced_atoms(self, value):
""" Set the left displaced atoms
Args:
value (:obj:`AtomList`): left displaced atoms
Raises:
:obj:`ValueError`: if `l_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`l_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._l_displaced_atoms = value
@property
def r_displaced_atoms(self):
""" Get the right displaced atoms
Returns:
:obj:`AtomList`: right displaced atoms
"""
return self._r_displaced_atoms
@r_displaced_atoms.setter
def r_displaced_atoms(self, value):
""" Set the right displaced atoms
Args:
value (:obj:`AtomList`): right displaced atoms
Raises:
:obj:`ValueError`: if `r_displaced_atoms` is not an instance of `AtomList`
"""
if value is None:
raise ValueError('`r_displaced_atoms` must be an instance of `AtomList`')
if not isinstance(value, AtomList):
value = AtomList(value)
self._r_displaced_atoms = value
@property
def order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self._order
@order.setter
def order(self, value):
""" Set the order
Args:
value (:obj:`BondOrder`): order
Raises:
:obj:`ValueError`: if `order` is not an instance of `BondOrder`
"""
if not isinstance(value, BondOrder):
raise ValueError('`order` must be an instance of `BondOrder`')
self._order = value
@property
def stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self._stereo
@stereo.setter
def stereo(self, value):
""" Set the stereo
Args:
value (:obj:`BondStereo`): stereochemistry
Raises:
:obj:`ValueError`: if `stereo` is not an instance of `BondStereo`
"""
if value is not None and not isinstance(value, BondStereo):
raise ValueError('`stereo` must be an instance of `BondStereo` or `None`')
self._stereo = value
@property
def comments(self):
""" Get comments
Returns:
:obj:`str`: comments
"""
return self._comments
@comments.setter
def comments(self, value):
""" Set comments
Args:
value (:obj:`str`): comments
Raises:
:obj:`ValueError`: if value is not a str or None
"""
if value and not isinstance(value, str):
raise ValueError('`comments` must be a string or None')
self._comments = value
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.l_bond_atoms
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.r_bond_atoms
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.l_displaced_atoms
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
return self.r_displaced_atoms
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self.order
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self.stereo
def is_equal(self, other):
""" Determine if two bonds are semantically equal
Args:
other (:obj:`Bond`): other bond
Returns:
:obj:`bool`: :obj:`True` if the bond are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if (self.l_monomer is None and other.l_monomer is not None) or \
(self.l_monomer is not None and not self.l_monomer.is_equal(other.l_monomer)):
return False
if (self.r_monomer is None and other.r_monomer is not None) or \
(self.r_monomer is not None and not self.r_monomer.is_equal(other.r_monomer)):
return False
if not self.l_bond_atoms.is_equal(other.l_bond_atoms) \
or not self.r_bond_atoms.is_equal(other.r_bond_atoms) \
or not self.l_displaced_atoms.is_equal(other.l_displaced_atoms) \
or not self.r_displaced_atoms.is_equal(other.r_displaced_atoms):
return False
if self.order != other.order:
return False
if self.stereo != other.stereo:
return False
if self.comments != other.comments:
return False
return True
def __str__(self):
""" Generate string representation of bond
Returns:
:obj:`str`: string representation of bond
"""
attrs = []
for atom_type in ['l_bond_atoms', 'r_bond_atoms',
'l_displaced_atoms', 'r_displaced_atoms']:
for atom in getattr(self, atom_type):
if atom.charge > 0:
charge = '+' + str(atom.charge)
elif atom.charge < 0:
charge = str(atom.charge)
else:
charge = ''
attrs.append('{}: {}{}{}{}'.format(atom_type[0:-1].replace('_', '-'),
atom.monomer or '',
atom.element,
atom.position or '',
charge))
if self.order != BondOrder.single:
attrs.append('order: "{}"'.format(self.order.name))
if self.stereo is not None:
attrs.append('stereo: "{}"'.format(self.stereo.name))
if self.comments:
attrs.append('comments: "{}"'.format(self.comments.replace('"', '\\"')))
return "[{}]".format(' | '.join(attrs))
class OntoBond(BondBase):
""" A crosslinking bond whose molecular details are defined in an
ontology of crosslinks
Attributes:
type (:obj:`Bond`): type of bond
l_monomer (:obj:`int`): location of left monomeric form
r_monomer (:obj:`int`): location of right monomeric form
"""
def __init__(self, type=None, l_monomer=None, r_monomer=None):
self.type = type
self.l_monomer = l_monomer
self.r_monomer = r_monomer
@property
def type(self):
""" Get type
Returns:
:obj:`Bond`: type
"""
return self._type
@type.setter
def type(self, value):
""" Set type
Args:
value (:obj:`Bond`): type
Raises:
:obj:`ValueError`: if value is not a Bond or None
"""
if value is not None:
if not isinstance(value, Bond):
raise ValueError('`type` must be an instance of `Bond` or `None`')
self._type = value
@property
def l_monomer(self):
""" Get location of left monomeric form
Returns:
:obj:`int`: location of left monomeric form
"""
return self._l_monomer
@l_monomer.setter
def l_monomer(self, value):
""" Set location of left monomeric form
Args:
value (:obj:`int`): location of left monomeric form
Raises:
:obj:`ValueError`: if value is not a positive integer or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`l_monomer` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`l_monomer` must be a positive integer or None')
value = int(value)
self._l_monomer = value
@property
def r_monomer(self):
""" Get location of right monomeric form
Returns:
:obj:`int`: location of right monomeric form
"""
return self._r_monomer
@r_monomer.setter
def r_monomer(self, value):
""" Set location of right monomeric form
Args:
value (:obj:`int`): location of right monomeric form
Raises:
:obj:`ValueError`: if value is not a positive integer or None
"""
if value is not None:
if not isinstance(value, (int, float)):
raise ValueError('`r_monomer` must be a positive integer or None')
if value != int(value) or value < 1:
raise ValueError('`r_monomer` must be a positive integer or None')
value = int(value)
self._r_monomer = value
def get_l_bond_atoms(self):
""" Get left bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.l_bond_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_r_bond_atoms(self):
""" Get right bond atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.r_bond_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.r_monomer))
return atoms
def get_l_displaced_atoms(self):
""" Get left displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.l_displaced_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_r_displaced_atoms(self):
""" Get right displaced atoms
Returns:
:obj:`list` of :obj:`Atom`: left bond atoms
"""
atoms = []
for atom in self.type.r_displaced_atoms:
atoms.append(Atom(atom.molecule, atom.element,
position=atom.position, charge=atom.charge, monomer=self.l_monomer))
return atoms
def get_order(self):
""" Get the order
Returns:
:obj:`BondOrder`: order
"""
return self.type.order
def get_stereo(self):
""" Get the stereochemistry
Returns:
:obj:`BondStereo`: stereochemistry
"""
return self.type.stereo
def __str__(self):
""" Generate string representation of bond
Returns:
:obj:`str`: string representation of bond
"""
attrs = []
if self.type:
attrs.append('type: "{}"'.format(onto_crosslink_to_id[self.type]))
if self.l_monomer:
attrs.append('l: {}'.format(self.l_monomer))
if self.r_monomer:
attrs.append('r: {}'.format(self.r_monomer))
return "[{}]".format(' | '.join(attrs))
def is_equal(self, other):
""" Determine if two bonds are semantically equal
Args:
other (:obj:`Bond`): other bond
Returns:
:obj:`bool`: :obj:`True` if the bond are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if (self.type is None and other.type is not None) or \
(self.type is not None and not self.type.is_equal(other.type)):
return False
if self.l_monomer != other.l_monomer or self.r_monomer != other.r_monomer:
return False
return True
class BondSet(set):
""" Set of bonds """
def add(self, bond):
""" Add a bond
Args:
bond (:obj:`BondBase`): bond
Raises:
:obj:`ValueError`: if the `bond` is not an instance of `Bond`
"""
if not isinstance(bond, BondBase):
raise ValueError('`bond` must be an instance of `BondBase`')
super(BondSet, self).add(bond)
def update(self, bonds):
""" Add a set of bonds
Args:
bonds (iterable of :obj:`BondBase`): bonds
"""
for bond in bonds:
self.add(bond)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`BondSet`): other set of bonds
"""
for o in other:
if o in self:
self.remove(o)
else:
self.add(o)
def is_equal(self, other):
""" Check if two sets of bonds are semantically equal
Args:
other (:obj:`BondSet`): other set of bonds
Returns:
:obj:`bool`: :obj:`True`, if the bond sets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if len(self) != len(other):
return False
o_bonds = set(other)
for s_bond in self:
match = False
for o_bond in set(o_bonds):
if s_bond.is_equal(o_bond):
match = True
o_bonds.remove(o_bond)
break
if not match:
return False
return True
class Nick(object):
""" Nick between adjacent monomeric forms
Attributes:
position (:obj:`int`): position of nick (:math:`1 \ldots L-1`) where
:math:`1` indicates a nick between the first and second residues,
:math:`2` indicates a nick between the second and third residues, etc.
"""
def __init__(self, position=None):
"""
Args:
position (:obj:`int`, optional): position of nick
"""
self.position = position
@property
def position(self):
""" Get the position
Returns:
:obj:`int`: position
"""
return self._position
@position.setter
def position(self, value):
""" Set the position
Args:
value (:obj:`int`): position
"""
if value is not None:
if (not isinstance(value, (int, float)) or value != int(value) or value < 1):
raise ValueError('`position` must be a positive integer or None')
value = int(value)
self._position = value
def is_equal(self, other):
return self is other or (
self.__class__ is other.__class__ and
self.position == other.position)
class NickSet(set):
""" Set of nicks """
def add(self, nick):
""" Add a nick
Args:
nick (:obj:`Nick`): nick
Raises:
:obj:`ValueError`: if the `nick` is not an instance of `Nick`
"""
if not isinstance(nick, Nick):
raise ValueError('`nick` must be an instance of `Nick`')
super(NickSet, self).add(nick)
def update(self, nicks):
""" Add a set of nicks
Args:
nicks (iterable of :obj:`Nick`): nicks
"""
for nick in nicks:
self.add(nick)
def symmetric_difference_update(self, other):
""" Remove common elements with other and add elements from other not in self
Args:
other (:obj:`NickSet`): other set of nicks
"""
for o in other:
if o in self:
self.remove(o)
else:
self.add(o)
def is_equal(self, other):
""" Check if two sets of nicks are semantically equal
Args:
other (:obj:`NickSet`): other set of nicks
Returns:
:obj:`bool`: :obj:`True`, if the nicks sets are semantically equal
"""
if self is other:
return True
if self.__class__ != other.__class__:
return False
if len(self) != len(other):
return False
o_nicks = set(other)
for s_nick in self:
match = False
for o_nick in set(o_nicks):
if s_nick.is_equal(o_nick):
match = True
o_nicks.remove(o_nick)
break
if not match:
return False
return True
class BpForm(object):
""" Biopolymer form
Attributes:
seq (:obj:`MonomerSequence`): sequence of monomeric forms of the biopolymer
alphabet (:obj:`Alphabet`): alphabet of monomeric forms
backbone (:obj:`Backbone`): backbone that connects monomeric forms
bond (:obj:`Bond`): bonds between (backbones of) monomeric forms
circular (:obj:`bool`): if :obj:`True`, indicates that the biopolymer is circular
crosslinks (:obj:`BondSet`): crosslinking intrachain bonds
nicks (:obj:`NickSet`): set of nicks
features (:obj:`BpFormFeatureSet`): set of features
_parser (:obj:`lark.Lark`): parser
"""
DEFAULT_FASTA_CODE = '?'
def __init__(self, seq=None, alphabet=None, backbone=None, bond=None, circular=False,
crosslinks=None, nicks=None):
"""
Args:
seq (:obj:`MonomerSequence`, optional): sequence of monomeric forms of the biopolymer
alphabet (:obj:`Alphabet`, optional): alphabet of monomeric forms
backbone (:obj:`Backbone`, optional): backbone that connects monomeric forms
bond (:obj:`Bond`, optional): bonds between (backbones of) monomeric forms
circular (:obj:`bool`, optional): if :obj:`True`, indicates that the biopolymer is circular
crosslinks (:obj:`BondSet`, optional): crosslinking intrachain bonds
nicks (:obj:`NickSet`, optional): set of nicks
"""
if alphabet is None:
alphabet = Alphabet()
if backbone is None:
backbone = Backbone()
if bond is None:
bond = Bond()
if crosslinks is None:
crosslinks = BondSet()
if nicks is None:
nicks = NickSet()
self.seq = seq or MonomerSequence()
self.alphabet = alphabet
self.backbone = backbone
self.bond = bond
self.circular = circular
self.crosslinks = crosslinks
self.nicks = nicks
self.features = BpFormFeatureSet(self)
@property
def seq(self):
""" Get the sequence of monomeric forms
Returns:
:obj:`MonomerSequence`: sequence of monomeric forms
"""
return self._monomer_seq
@seq.setter
def seq(self, value):
""" Set the sequence of monomeric forms
Args:
value (:obj:`MonomerSequence`): sequence of monomeric forms
Raises:
:obj:`ValueError`: if `seq` is not an instance of `MonomerSequence`
"""
if value is None:
raise ValueError('`seq` must be an instance of `MonomerSequence`')
if not isinstance(value, MonomerSequence):
value = MonomerSequence(value)
self._monomer_seq = value
@property
def alphabet(self):
""" Get the alphabet
Returns:
:obj:`Alphabet`: alphabet
"""
return self._alphabet
@alphabet.setter
def alphabet(self, value):
""" Set the sequence of monomeric forms
Args:
value (:obj:`Alphabet`): alphabet
Raises:
:obj:`ValueError`: if `alphabet` is not an instance of `Alphabet`
"""
if not isinstance(value, Alphabet):
raise ValueError('`alphabet` must be an instance of `Alphabet`')
self._alphabet = value
@property
def backbone(self):
""" Get the backbones
Returns:
:obj:`Backbone`: backbones
"""
return self._backbone
@backbone.setter
def backbone(self, value):
""" Set the backbones
Args:
value (:obj:`Backbone`): backbones
"""
if not isinstance(value, Backbone):
raise ValueError('`backbone` must be an instance of `Backbone`')
self._backbone = value
@property
def bond(self):
""" Get the bonds
Returns:
:obj:`Bond`: bonds
"""
return self._bond
@bond.setter
def bond(self, value):
""" Set the bonds
Args:
value (:obj:`Bond`): bonds
"""
if not isinstance(value, Bond):
raise ValueError('`bond` must be an instance of `Bond`')
self._bond = value
@property
def circular(self):
""" Get the circularity
Returns:
:obj:`bool`: circularity
"""
return self._circular
@circular.setter
def circular(self, value):
""" Set the circularity
Args:
value (:obj:`bool`): circularity
"""
if not isinstance(value, bool):
raise ValueError('`circular` must be an instance of `bool`')
self._circular = value
@property
def crosslinks(self):
""" Get the crosslinking intrachain bonds
Returns:
:obj:`BondSet`: crosslinking intrachain bonds
"""
return self._crosslinks
@crosslinks.setter
def crosslinks(self, value):
""" Set the crosslinking intrachain bonds
Args:
value (:obj:`list` of :obj:`BondSet`): crosslinking intrachain bonds
"""
if not isinstance(value, BondSet):
raise ValueError('`crosslinks` must be an instance of `BondSet`')
self._crosslinks = value
@property
def nicks(self):
""" Get the nicks
Returns:
:obj:`NickSet`: nicks
"""
return self._nicks
@nicks.setter
def nicks(self, value):
""" Set the nicks
Args:
value (:obj:`list` of :obj:`NickSet`): nicks
"""
if not isinstance(value, NickSet):
raise ValueError('`nicks` must be an instance of `NickSet`')
self._nicks = value
@property
def features(self):
""" Get the features
Returns:
:obj:`BpFormFeatureSet`: features
"""
return self._features
@features.setter
def features(self, value):
""" Set the features
Args:
value (:obj:`BpFormFeatureSet`): features
"""
if not isinstance(value, BpFormFeatureSet):
raise ValueError('`features` must be an instance of `BpFormFeatureSet`')
if hasattr(self, '_features'):
raise ValueError('`features` cannot be set outside constructor')
self._features = value
def is_equal(self, other):
""" Check if two biopolymer forms are semantically equal
Args:
other (:obj:`BpForm`): another biopolymer form
Returns:
:obj:`bool`: :obj:`True`, if the objects have the same structure
"""
return self is other or (self.__class__ == other.__class__
and self.seq.is_equal(other.seq)
and self.alphabet.is_equal(other.alphabet)
and self.backbone.is_equal(other.backbone)
and self.bond.is_equal(other.bond)
and self.circular == other.circular
and self.crosslinks.is_equal(other.crosslinks)
and self.nicks.is_equal(other.nicks))
def diff(self, other):
""" Determine the semantic difference between two biopolymer forms
Args:
other (:obj:`BpForm`): another biopolymer form
Returns:
:obj:`str`: description of the semantic difference between
two biopolymer forms
"""
diff = []
# class
if self.__class__ != other.__class__:
diff.append('{} != {}'.format(
self.__class__.__name__,
other.__class__.__name__))
if diff:
return '\n'.join(diff)
# alphabet, backbone, inter-monomeric form bond
if not self.alphabet.is_equal(other.alphabet):
diff.append('Forms have different alphabets')
if (self.backbone is None and other.backbone is not None) or \
(self.backbone is not None and not self.backbone.is_equal(other.backbone)):
diff.append('Forms have different backbones')
if (self.bond is None and self.bond is not None) or \
(self.bond is not None and not self.bond.is_equal(other.bond)):
diff.append('Forms have different inter-monomer bonds')
if diff:
return '\n'.join(diff)
# sequence, crosslinks, nicks, circularity
if len(self.seq) != len(other.seq):
diff.append('Length {} != {}'.format(
len(self.seq), len(other.seq)))
else:
for i_monomer, (monomer, o_monomer) in enumerate(zip(
self.seq, other.seq)):
if not monomer.is_equal(o_monomer):
diff.append('Monomeric form {} {} != {}'.format(
i_monomer + 1, monomer, o_monomer))
if len(self.crosslinks) != len(other.crosslinks):
diff.append('Number of crosslinks {} != {}'.format(
len(self.crosslinks), len(other.crosslinks)))
else:
o_crosslinks = list(other.crosslinks)
for link in self.crosslinks:
match = False
for o_link in list(o_crosslinks):
if link.is_equal(o_link):
match = True
o_crosslinks.remove(o_link)
break
if not match:
diff.append('Crosslink {} not in other'.format(link))
for o_link in o_crosslinks:
diff.append('Crosslink {} not in self'.format(o_link))
if len(self.nicks) != len(other.nicks):
diff.append('Number of nicks {} != {}'.format(
len(self.nicks), len(other.nicks)))
else:
o_nicks = list(other.nicks)
for nick in self.nicks:
match = False
for o_nick in list(o_nicks):
if nick.is_equal(o_nick):
match = True
o_nicks.remove(o_nick)
break
if not match:
diff.append('Nick {} not in other'.format(nick.position))
for o_nick in o_nicks:
diff.append('Nick {} not in self'.format(o_nick._position))
if self.circular != other.circular:
diff.append('Circularity {} != {}'.format(
self.circular, other.circular))
if diff:
return '\n'.join(diff)
return None
def __getitem__(self, slice):
""" Get monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
Returns:
:obj:`Monomer` or :obj:`Monomers`: monomeric form(s)
"""
return self.seq.__getitem__(slice)
def __setitem__(self, slice, monomer):
""" Set monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
monomer (:obj:`Monomer` or :obj:`Monomers`): monomeric forms(s)
"""
self.seq.__setitem__(slice, monomer)
def __delitem__(self, slice):
""" Delete monomeric form(s) at slice
Args:
slice (:obj:`int` or :obj:`slice`): position(s)
"""
self.seq.__delitem__(slice)
def __iter__(self):
""" Get iterator over sequence of monomeric forms
Returns:
:obj:`iterator` of :obj:`Monomer`: iterator of monomeric forms
"""
return self.seq.__iter__()
def __reversed__(self):
""" Get reverse iterator over sequence of monomeric forms
Returns:
:obj:`iterator` of :obj:`Monomer`: iterator of monomeric forms
"""
return self.seq.__reversed__()
def __contains__(self, monomer):
""" Determine if a monomeric form is in the biopolymer form
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: true if the monomeric form is in the sequence
"""
return self.seq.__contains__(monomer)
def __len__(self):
""" Get the length of the sequence of the form
Returns:
:obj:`int`: length
"""
return len(self.seq)
def validate(self):
""" Check that the biopolymer form is valid and return any errors
* Check that monomeric forms :math:`1 \ldots L-1` can bond to the right (their right bonding attributes are set)
* Check that monomeric forms :math:`2 \ldots L` can bond to the left (their left bonding attributes are set)
* No atom is involved in multiple bonds
Returns:
:obj:`list` of :obj:`str`: list of errors, if any
"""
errors = []
bonding_backbone_hydrogens = []
bonding_monomer_hydrogens = []
el_table = openbabel.OBElementTable()
def check_atom(molecule, atom_type, i_monomer, i_atom, structure, atom_md, bonding_hydrogens, el_table, errors):
if structure is None:
errors.append('Structure cannot be None')
return
atom_obj = structure.GetAtom(atom_md.position)
if atom_md.element == 'H':
atom_obj = get_hydrogen_atom(atom_obj, bonding_hydrogens, i_monomer)
if atom_obj:
if el_table.GetSymbol(atom_obj.GetAtomicNum()) != atom_md.element:
errors.append("{} atom '{}[{}]' must be {}".format(molecule, atom_type, i_atom, atom_md.element))
# check that bond atoms are defined
atom_types = ['monomer_bond_atoms', 'monomer_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(self.backbone, atom_type)):
if atom.molecule != Backbone or not atom.element or not atom.position:
errors.append("Backbone atom '{}[{}]' must have a defined element and position".format(atom_type, i_atom))
else:
check_atom('Backbone', atom_type, None, i_atom, self.backbone.structure, atom,
bonding_backbone_hydrogens, el_table, errors)
atom_types = ['l_bond_atoms', 'l_displaced_atoms',
'r_bond_atoms', 'r_displaced_atoms']
for atom_type in atom_types:
if len(set(atom.molecule for atom in getattr(self.bond, atom_type))) > 1:
errors.append("'{}' must have the same molecule type".format(atom_type))
for i_atom, atom in enumerate(getattr(self.bond, atom_type)):
if not atom.element or (atom.molecule == Backbone and not atom.position):
errors.append("Bond atom '{}[{}]' must have a defined element and position".format(atom_type, i_atom))
elif atom.molecule == Backbone:
check_atom('Bond', atom_type, None, i_atom, self.backbone.structure, atom,
bonding_backbone_hydrogens, el_table, errors)
for i_monomer, monomer in enumerate(self.seq):
if not monomer.structure:
errors.append('Monomer {} must have a defined structure'.format(i_monomer + 1))
continue
atom_types = ['backbone_bond_atoms', 'backbone_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(monomer, atom_type)):
if atom.molecule != Monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of monomer {} must have a defined element and position".format(
atom_type, i_atom, i_monomer + 1))
else:
check_atom('Monomer {}'.format(i_monomer + 1), atom_type, i_monomer, i_atom,
monomer.structure, atom, bonding_monomer_hydrogens, el_table, errors)
atom_types = ['r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(monomer, atom_type)):
if atom.molecule != Monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of monomer {} must have a defined element and position".format(
atom_type, i_atom, i_monomer + 1))
else:
check_atom('Monomer {}'.format(i_monomer + 1), atom_type, i_monomer, i_atom,
monomer.structure, atom, bonding_monomer_hydrogens, el_table, errors)
# crosslinks
for i_crosslink, crosslink in enumerate(self.crosslinks):
if isinstance(crosslink, OntoBond):
if self.seq[crosslink.l_monomer - 1] != crosslink.type.l_monomer:
errors.append('Monomer {} must be equal to the left monomer of crosslink {}'.format(
crosslink.l_monomer, onto_crosslink_to_id[crosslink.type]))
if self.seq[crosslink.r_monomer - 1] != crosslink.type.r_monomer:
errors.append('Monomer {} must be equal to the right monomer of crosslink {}'.format(
crosslink.r_monomer, onto_crosslink_to_id[crosslink.type]))
else:
atom_types = ['r_bond_atoms', 'r_displaced_atoms',
'l_bond_atoms', 'l_displaced_atoms']
for atom_type in atom_types:
for i_atom, atom in enumerate(getattr(crosslink, 'get_' + atom_type)()):
if atom.molecule != Monomer or not atom.monomer or not atom.element or not atom.position:
errors.append("'{}[{}]' of crosslink {} must have a defined monomer, element, and position".format(
atom_type, i_atom, i_crosslink + 1))
else:
check_atom('Crosslink {} - monomer {}'.format(i_crosslink, atom.monomer), atom_type, atom.monomer - 1, i_atom,
self.seq[atom.monomer - 1].structure, atom, bonding_monomer_hydrogens,
el_table, errors)
# nicks
max_nick_pos = len(self.seq) - 1
nick_positions = set()
for nick in self.nicks:
if nick.position > max_nick_pos:
errors.append('Nick position {} must be less than or equal to {}'.format(
nick.position, max_nick_pos))
nick_positions.add(nick.position)
if len(nick_positions) < len(self.nicks):
errors.append('Nick positions be unique')
# check that monomers 1 .. L-1 can bond to right (except at locations of nicks)
for i_monomer, monomer in enumerate(self.seq[0:-1]):
if (i_monomer + 1) not in nick_positions and not self.can_monomer_bond_right(monomer):
errors.append('Monomeric form {} must be able to bond to the right'.format(i_monomer + 1))
if self.circular and not self.can_monomer_bond_right(self.seq[-1]):
errors.append('Monomeric form {} must be able to bond to the right'.format(len(self.seq)))
# check that monomers 2 .. L can bond to left (except at locations of nicks)
for i_monomer, monomer in enumerate(self.seq[1:]):
if (i_monomer + 1) not in nick_positions and not self.can_monomer_bond_left(monomer):
errors.append('Monomeric form {} must be able to bond to the left'.format(i_monomer + 2))
if self.circular and not self.can_monomer_bond_left(self.seq[0]):
errors.append('Monomeric form {} must be able to bond to the left'.format(1))
# left/right backbone/monomer atoms same length
if len(self.bond.l_bond_atoms) != len(self.bond.r_bond_atoms):
errors.append('Number or left and right bond atoms must be equal')
n_bond_left_atoms = sum([1 for atom in self.bond.l_bond_atoms if atom.molecule == Backbone])
n_bond_right_atoms = sum([1 for atom in self.bond.r_bond_atoms if atom.molecule == Backbone])
for i_monomer, monomer in enumerate(self.seq):
if len(self.backbone.monomer_bond_atoms) < len(monomer.backbone_bond_atoms):
errors.append(('Number of monomer-backbone atoms for monomer {} '
'must be less than or equal to the number of backbone-monomer atoms').format(i_monomer + 1))
for i_monomer, (monomer_l, monomer_r) in enumerate(zip(self.seq[0:-1], self.seq[1:])):
if len(monomer_l.r_bond_atoms) + n_bond_right_atoms != len(monomer_r.l_bond_atoms) + n_bond_left_atoms:
errors.append('Number of right and left bond atoms must be equal for monomers {}-{}'.format(i_monomer + 1, i_monomer + 2))
if self.circular and \
len(self.seq[-1].r_bond_atoms) + n_bond_right_atoms != len(self.seq[0].l_bond_atoms) + n_bond_left_atoms:
errors.append('Number of right and left bond atoms must be equal for monomers {}, {}'.format(len(self.seq), 1))
for i_crosslink, crosslink in enumerate(self.crosslinks):
if len(crosslink.get_l_bond_atoms()) != len(crosslink.get_r_bond_atoms()):
errors.append('Number of right and left bond atoms must be equal for crosslink {}'.format(i_crosslink + 1))
crosslinks = list(self.crosslinks)
for crosslink in crosslinks:
for other_crosslink in crosslinks[1:]:
if crosslink.is_equal(other_crosslink):
errors.append('Crosslink {} cannot be repeated'.format(str(crosslink)))
# uncertainty
for i_monomer, monomer in enumerate(self.seq):
if monomer.start_position is not None and monomer.start_position > len(self.seq):
errors.append('Start position for monomer {} must be less than the length of the sequence {}'.format(
i_monomer, len(self.seq)))
if monomer.end_position is not None and monomer.end_position > len(self.seq):
errors.append('End position for monomer {} must be less than the length of the sequence {}'.format(
i_monomer, len(self.seq)))
if monomer.start_position is not None and \
monomer.end_position is not None and \
monomer.start_position > monomer.end_position:
errors.append('Start position {} for monomer {} must be less than or equal to the end position {}'.format(
monomer.start_position, i_monomer, monomer.end_position))
# return errors
return errors
def can_monomer_bond_left(self, monomer):
""" Check if monomeric form can bond to the left
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the monomeric form can bond to the left
"""
return len(monomer.l_bond_atoms) > 0 or \
(len(monomer.backbone_bond_atoms) > 0
and len(self.backbone.monomer_bond_atoms) > 0
and len(self.bond.l_bond_atoms) > 0
and self.bond.l_bond_atoms[0].molecule == Backbone)
def can_monomer_bond_right(self, monomer):
""" Check if monomeric form can bond to right
Args:
monomer (:obj:`Monomer`): monomeric form
Returns:
:obj:`bool`: :obj:`True`, if the monomeric form can bond to the right
"""
return len(monomer.r_bond_atoms) > 0 or \
(len(monomer.backbone_bond_atoms) > 0
and len(self.backbone.monomer_bond_atoms) > 0
and len(self.bond.r_bond_atoms) > 0
and self.bond.r_bond_atoms[0].molecule == Backbone)
def get_monomer_counts(self):
""" Get the frequency of each monomeric form within the biopolymer
Returns:
:obj:`dict`: dictionary that maps monomeric forms to their counts
"""
return self.seq.get_monomer_counts()
def get_major_micro_species(self, ph, major_tautomer=False, dearomatize=False):
""" Get the major protonation and tautomerization state
Args:
ph (:obj:`float`): pH
major_tautomer (:obj:`bool`, optional): if :obj:`True`, calculate the major tautomer
dearomatize (:obj:`bool`, optional): if :obj:`True`, dearomatize molecule
Returns:
:obj:`openbabel.OBMol`: major protonation and tautomerization state
"""
if not self.seq: