Skip to content

Commit

Permalink
implement draw_xlink
Browse files Browse the repository at this point in the history
  • Loading branch information
xzheng902 committed Aug 7, 2019
1 parent bc5ca83 commit a77a291
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 17 deletions.
115 changes: 106 additions & 9 deletions bcforms/core.py
Expand Up @@ -15,10 +15,9 @@
import pkg_resources
from ruamel import yaml
import wc_utils.cache
from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils
from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils, draw_molecule
import bpforms
import bpforms.core
import bpforms.alphabet.protein

# setup cache
cache_dir = os.path.expanduser('~/.cache/bcforms')
Expand Down Expand Up @@ -965,7 +964,6 @@ class AbstractedCrosslink(Crosslink):
xlink_details (:obj:`tuple`): detailed information about the abstracted crosslink
_xlink_atom_parser (:obj:`lark.Lark`): lark grammar parser used to parse atom strings
_xlink_filename (obj:`str`): path to the xlink yml file
"""

Expand Down Expand Up @@ -1931,6 +1929,15 @@ def get_structure(self):
atom_type[i_atom] = atom+n_atoms[i_subunit]
atom_maps.append(atom_map)

print(atom_maps)

for atom_map in atom_maps:
for subunit_map in atom_map.values():
for monomer in subunit_map.values():
for atom_type in monomer.values():
for i_atom, atom in atom_type.items():
atom_type[i_atom] = mol.GetAtom(atom)

# mol.AddHydrogens()

# print(atom_maps)
Expand All @@ -1949,21 +1956,24 @@ def get_structure(self):
for atom_md in getattr(crosslink, 'get_'+atom_type)():
i_subunit = [i for i in range(len(self.subunits)) if self.subunits[i].id == atom_md.subunit][0]
subunit_idx = 1 if atom_md.subunit_idx is None else atom_md.subunit_idx
atom = mol.GetAtom(atom_maps[i_subunit][subunit_idx][atom_md.monomer][atom_md.component_type][atom_md.position])
atom = atom_maps[i_subunit][subunit_idx][atom_md.monomer][atom_md.component_type][atom_md.position]
if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
atom = get_hydrogen_atom(atom, bonding_hydrogens, (i_subunit, subunit_idx-1, atom_md.monomer-1, atom_md.component_type))
crosslink_atoms[atom_type].append((atom, atom_md.charge))
crosslink_atoms[atom_type].append((atom, i_subunit, subunit_idx, atom_md.monomer, atom_md.position, atom_md.charge))

# print(OpenBabelUtils.export(mol, format='smiles', options=[]))

# make the crosslink bonds
for atoms in crosslinks_atoms:

for atom, atom_charge in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
for atom, i_subunit, subunit_idx, i_monomer, i_position, atom_charge in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
if atom:
atom_2 = atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].get(i_position, None)
if atom_2 and atom_2.GetIdx() == atom.GetIdx():
atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].pop(i_position)
assert mol.DeleteAtom(atom, True)

for (l_atom, l_atom_charge), (r_atom, r_atom_charge) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
for (l_atom, _, _, _, _, l_atom_charge), (r_atom, _, _, _, _, r_atom_charge) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
bond = openbabel.OBBond()
bond.SetBegin(l_atom)
bond.SetEnd(r_atom)
Expand All @@ -1976,7 +1986,14 @@ def get_structure(self):
if r_atom_charge:
r_atom.SetFormalCharge(r_atom.GetFormalCharge() + r_atom_charge)

return mol
for atom_map in atom_maps:
for subunit_map in atom_map.values():
for monomer in subunit_map.values():
for atom_type in monomer.values():
for i_atom, atom in atom_type.items():
atom_type[i_atom] = atom.GetIdx()

return mol, atom_maps

def export(self, format='smiles', options=[]):
""" Export the structure to string
Expand All @@ -1989,7 +2006,7 @@ def export(self, format='smiles', options=[]):
:obj:`str`: exported string representation of the structure
"""
return OpenBabelUtils.export(self.get_structure(), format=format, options=options)
return OpenBabelUtils.export(self.get_structure()[0], format=format, options=options)


def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
Expand All @@ -2008,3 +2025,83 @@ def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
bonding_hydrogens.append(tmp)
return other_atom
return None

def draw_xlink(xlink_name):
""" return png of crosslink for webpage
Args:
xlink_name (:obj:`str`): name of xlink
Returns:
:obj:`object`: image
Raises:
:obj:`KeyError`: Unknown crosslink id
:obj:`ValueError`: Unknown monomer alphabet
"""
xlink_detail_dict = parse_yaml(_xlink_filename)
if xlink_name in xlink_detail_dict:
xlink_details = xlink_detail_dict[xlink_name]
else:
raise KeyError('Unknown crosslink id')

# create the bcform
form = BcForm()

l_monomer_alphabet = xlink_details['l_monomer_alphabet']
if l_monomer_alphabet == 'bpforms.ProteinForm':
l_monomer = bpforms.ProteinForm().from_str(xlink_details['l_monomer'])
elif l_monomer_alphabet == 'bpforms.DnaForm':
l_monomer = bpforms.DnaForm().from_str(xlink_details['l_monomer'])
elif l_monomer_alphabet == 'bpforms.RnaForm':
l_monomer = bpforms.RnaForm().from_str(xlink_details['l_monomer'])
else:
raise ValueError('Unknown monomer alphabet')

r_monomer_alphabet = xlink_details['r_monomer_alphabet']
if r_monomer_alphabet == 'bpforms.ProteinForm':
r_monomer = bpforms.ProteinForm().from_str(xlink_details['r_monomer'])
elif r_monomer_alphabet == 'bpforms.DnaForm':
r_monomer = bpforms.DnaForm().from_str(xlink_details['r_monomer'])
elif r_monomer_alphabet == 'bpforms.RnaForm':
r_monomer = bpforms.RnaForm().from_str(xlink_details['r_monomer'])
else:
raise ValueError('Unknown monomer alphabet')

form.subunits.append(Subunit(id='l', stoichiometry=1, structure=l_monomer))
form.subunits.append(Subunit(id='r', stoichiometry=1, structure=r_monomer))
form.crosslinks.append(AbstractedCrosslink(type=xlink_name, l_subunit='l', l_monomer=1, r_subunit='r', r_monomer=1))

cml = form.export(format='cml')
structure, atom_maps = form.get_structure()

el_table = openbabel.OBElementTable()

atom_labels = []
i_atom = 1
atom_labels.append({'position': i_atom,
'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
'label': xlink_details['l_monomer'],
'color': 0x000000})

i_atom = structure.NumAtoms()
atom_labels.append({'position': i_atom,
'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
'label': xlink_details['r_monomer'],
'color': 0x000000})

i_l_atom = atom_maps[0][1][1]['monomer'][form.crosslinks[0].get_l_bond_atoms()[0].position]
i_r_atom = atom_maps[1][1][1]['monomer'][form.crosslinks[0].get_r_bond_atoms()[0].position]

bond_sets=[{
'positions': [[i_l_atom, i_r_atom]],
'elements': [[
el_table.GetSymbol(structure.GetAtom(i_l_atom).GetAtomicNum()),
el_table.GetSymbol(structure.GetAtom(i_r_atom).GetAtomicNum()),
]],
'color': 0xff0000,
}]


return draw_molecule(cml, 'cml', image_format='png',
atom_labels=atom_labels, atom_sets=[], bond_sets=bond_sets,
show_atom_nums=False,
width=300, height=200, include_xml_header=False)
4 changes: 2 additions & 2 deletions bcforms/xlink/xlink.yml
Expand Up @@ -567,9 +567,9 @@ V,Y:
l_monomer_alphabet: bpforms.ProteinForm
l_monomer: V
l_bond_atoms:
- C11
- C9
l_displaced_atoms:
- H11
- H9
r_monomer_alphabet: bpforms.ProteinForm
r_monomer: Y
r_bond_atoms:
Expand Down
Binary file modified bcforms/xlink/xlink_simple.xlsx
Binary file not shown.
20 changes: 14 additions & 6 deletions tests/test_core.py
Expand Up @@ -849,38 +849,38 @@ def test_get_structure(self):
bc_form_1 = core.BcForm().from_str('2*a')
self.assertTrue(len(bc_form_1.validate())==0)
bc_form_1.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
self.assertEqual(OpenBabelUtils.export(bc_form_1.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)O.C[C@H]([NH3+])C(=O)O')
self.assertEqual(OpenBabelUtils.export(bc_form_1.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)O.C[C@H]([NH3+])C(=O)O')

# mini "homodimer" AA
# linking C[C@H]([NH3+])C(=O)[O-] and C[C@H]([NH3+])C(=O)[O-]
bc_form_2 = core.BcForm().from_str('2*a | x-link: [l-bond-atom: a(1)-1C8 | l-displaced-atom: a(1)-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: a(2)-1N4-1 | r-displaced-atom: a(2)-1H4+1 | r-displaced-atom: a(2)-1H4]')
self.assertTrue(len(bc_form_2.validate())==0)
bc_form_2.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
self.assertEqual(OpenBabelUtils.export(bc_form_2.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)N[C@@H](C)C(=O)O')
self.assertEqual(OpenBabelUtils.export(bc_form_2.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)N[C@@H](C)C(=O)O')

# mini "heterodimer" AG
# linking C[C@H]([NH3+])C(=O)[O-] and C([NH3+])C(=O)[O-]
bc_form_3 = core.BcForm().from_str('a+g | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5]')
self.assertTrue(len(bc_form_3.validate())==0)
bc_form_3.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
bc_form_3.set_subunit_attribute('g', 'structure', bpforms.ProteinForm().from_str('G'))
self.assertEqual(OpenBabelUtils.export(bc_form_3.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)O')
self.assertEqual(OpenBabelUtils.export(bc_form_3.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)O')

# a more realistic example AGGA, where subunits are composed of multiple monomers
# linking C[C@H]([NH3+])C(=O)NCC(=O)[O-] and C([NH3+])C(=O)N[C@@H](C)C(=O)[O-]
bc_form_4 = core.BcForm().from_str('ag+ga | x-link: [l-bond-atom: ag-2C2 | l-displaced-atom: ag-2O1 | l-displaced-atom: ag-2H1 | r-bond-atom: ga-1N5-1 | r-displaced-atom: ga-1H5+1 | r-displaced-atom: ga-1H5]')
self.assertTrue(len(bc_form_4.validate())==0)
bc_form_4.set_subunit_attribute('ag', 'structure', bpforms.ProteinForm().from_str('AG'))
bc_form_4.set_subunit_attribute('ga', 'structure', bpforms.ProteinForm().from_str('GA'))
self.assertEqual(OpenBabelUtils.export(bc_form_4.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)NCC(=O)N[C@@H](C)C(=O)O')
self.assertEqual(OpenBabelUtils.export(bc_form_4.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)NCC(=O)N[C@@H](C)C(=O)O')

# a more realistic example ACCMGAGA, where subunits are composed of multiple monomers
# linking ACCM and 2*GA
bc_form_5 = core.BcForm().from_str('accm+2*ga | x-link: [l-bond-atom: accm-4C11 | l-displaced-atom: accm-4O13 | l-displaced-atom: accm-4H13 | r-bond-atom: ga(1)-1N5-1 | r-displaced-atom: ga(1)-1H5+1 | r-displaced-atom: ga(1)-1H5] | x-link: [l-bond-atom: ga(1)-2C8 | l-displaced-atom: ga(1)-2O10 | l-displaced-atom: ga(1)-2H10 | r-bond-atom: ga(2)-1N5-1 | r-displaced-atom: ga(2)-1H5+1 | r-displaced-atom: ga(2)-1H5]')
self.assertTrue(len(bc_form_5.validate())==0)
bc_form_5.set_subunit_attribute('accm', 'structure', bpforms.ProteinForm().from_str('ACCM'))
bc_form_5.set_subunit_attribute('ga', 'structure', bpforms.ProteinForm().from_str('GA'))
self.assertEqual(OpenBabelUtils.export(bc_form_5.get_structure(), 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('ACCMGAGA').get_structure()[0], 'smiles', options=['canonical']))
self.assertEqual(OpenBabelUtils.export(bc_form_5.get_structure()[0], 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('ACCMGAGA').get_structure()[0], 'smiles', options=['canonical']))

# mini "heterodimer" + small molecule AG+CH4
bc_form_6 = core.BcForm().from_str('a+g+small | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5] | x-link: [l-bond-atom: g-1C4 | l-displaced-atom: g-1H4 | r-bond-atom: small-1C1 | r-displaced-atom: small-1H1 ]')
Expand All @@ -892,11 +892,19 @@ def test_get_structure(self):
conversion.SetInFormat('smi')
conversion.ReadString(ob_mol, 'C')
bc_form_6.set_subunit_attribute('small', 'structure', ob_mol)
self.assertEqual(OpenBabelUtils.export(bc_form_6.get_structure(), 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('AA').get_structure()[0], 'smiles', options=['canonical']))
self.assertEqual(OpenBabelUtils.export(bc_form_6.get_structure()[0], 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('AA').get_structure()[0], 'smiles', options=['canonical']))

def test_export(self):
bc_form_3 = core.BcForm().from_str('a+g | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5]')
self.assertTrue(len(bc_form_3.validate())==0)
bc_form_3.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
bc_form_3.set_subunit_attribute('g', 'structure', bpforms.ProteinForm().from_str('G'))
self.assertEqual(bc_form_3.export(), 'C[C@H]([NH3+])C(=O)NCC(=O)O')

class MiscellaneousTestCase(unittest.TestCase):

def test_draw_xlink(self):

xlinks = list(core.parse_yaml(core._xlink_filename).keys())
for xlink in xlinks:
img = core.draw_xlink(xlink)

0 comments on commit a77a291

Please sign in to comment.