implement draw_xlink

KarrLab · Aug 7, 2019 · a77a291 · a77a291
1 parent bc5ca83
commit a77a291
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 17 deletions.
diff --git a/bcforms/core.py b/bcforms/core.py
@@ -15,10 +15,9 @@
 import pkg_resources
 from ruamel import yaml
 import wc_utils.cache
-from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils
+from wc_utils.util.chem import EmpiricalFormula, OpenBabelUtils, draw_molecule
 import bpforms
 import bpforms.core
-import bpforms.alphabet.protein
 
 # setup cache
 cache_dir = os.path.expanduser('~/.cache/bcforms')
@@ -965,7 +964,6 @@ class AbstractedCrosslink(Crosslink):
         xlink_details (:obj:`tuple`): detailed information about the abstracted crosslink
 
         _xlink_atom_parser (:obj:`lark.Lark`): lark grammar parser used to parse atom strings
-        _xlink_filename (obj:`str`): path to the xlink yml file
 
     """
 
@@ -1931,6 +1929,15 @@ def get_structure(self):
                             atom_type[i_atom] = atom+n_atoms[i_subunit]
             atom_maps.append(atom_map)
 
+        print(atom_maps)
+
+        for atom_map in atom_maps:
+            for subunit_map in atom_map.values():
+                for monomer in subunit_map.values():
+                    for atom_type in monomer.values():
+                        for i_atom, atom in atom_type.items():
+                            atom_type[i_atom] = mol.GetAtom(atom)
+
         # mol.AddHydrogens()
 
         # print(atom_maps)
@@ -1949,21 +1956,24 @@ def get_structure(self):
                 for atom_md in getattr(crosslink, 'get_'+atom_type)():
                     i_subunit = [i for i in range(len(self.subunits)) if self.subunits[i].id == atom_md.subunit][0]
                     subunit_idx = 1 if atom_md.subunit_idx is None else atom_md.subunit_idx
-                    atom = mol.GetAtom(atom_maps[i_subunit][subunit_idx][atom_md.monomer][atom_md.component_type][atom_md.position])
+                    atom = atom_maps[i_subunit][subunit_idx][atom_md.monomer][atom_md.component_type][atom_md.position]
                     if atom_md.element == 'H' and atom.GetAtomicNum() != 1:
                         atom = get_hydrogen_atom(atom, bonding_hydrogens, (i_subunit, subunit_idx-1, atom_md.monomer-1, atom_md.component_type))
-                    crosslink_atoms[atom_type].append((atom, atom_md.charge))
+                    crosslink_atoms[atom_type].append((atom, i_subunit, subunit_idx, atom_md.monomer, atom_md.position, atom_md.charge))
 
         # print(OpenBabelUtils.export(mol, format='smiles', options=[]))
 
         # make the crosslink bonds
         for atoms in crosslinks_atoms:
 
-            for atom, atom_charge in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
+            for atom, i_subunit, subunit_idx, i_monomer, i_position, atom_charge in itertools.chain(atoms['l_displaced_atoms'], atoms['r_displaced_atoms']):
                 if atom:
+                    atom_2 = atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].get(i_position, None)
+                    if atom_2 and atom_2.GetIdx() == atom.GetIdx():
+                        atom_maps[i_subunit][subunit_idx][i_monomer]['monomer'].pop(i_position)
                     assert mol.DeleteAtom(atom, True)
 
-            for (l_atom, l_atom_charge), (r_atom, r_atom_charge) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
+            for (l_atom, _, _, _, _, l_atom_charge), (r_atom, _, _, _, _, r_atom_charge) in zip(atoms['l_bond_atoms'], atoms['r_bond_atoms']):
                 bond = openbabel.OBBond()
                 bond.SetBegin(l_atom)
                 bond.SetEnd(r_atom)
@@ -1976,7 +1986,14 @@ def get_structure(self):
                 if r_atom_charge:
                     r_atom.SetFormalCharge(r_atom.GetFormalCharge() + r_atom_charge)
 
-        return mol
+        for atom_map in atom_maps:
+            for subunit_map in atom_map.values():
+                for monomer in subunit_map.values():
+                    for atom_type in monomer.values():
+                        for i_atom, atom in atom_type.items():
+                            atom_type[i_atom] = atom.GetIdx()
+
+        return mol, atom_maps
 
     def export(self, format='smiles', options=[]):
         """ Export the structure to string
@@ -1989,7 +2006,7 @@ def export(self, format='smiles', options=[]):
             :obj:`str`: exported string representation of the structure
 
         """
-        return OpenBabelUtils.export(self.get_structure(), format=format, options=options)
+        return OpenBabelUtils.export(self.get_structure()[0], format=format, options=options)
 
 
 def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
@@ -2008,3 +2025,83 @@ def get_hydrogen_atom(parent_atom, bonding_hydrogens, i_monomer):
                 bonding_hydrogens.append(tmp)
                 return other_atom
     return None
+
+def draw_xlink(xlink_name):
+    """ return png of crosslink for webpage
+
+    Args:
+        xlink_name (:obj:`str`): name of xlink
+    Returns:
+        :obj:`object`: image
+    Raises:
+        :obj:`KeyError`: Unknown crosslink id
+        :obj:`ValueError`: Unknown monomer alphabet
+    """
+    xlink_detail_dict = parse_yaml(_xlink_filename)
+    if xlink_name in xlink_detail_dict:
+        xlink_details = xlink_detail_dict[xlink_name]
+    else:
+        raise KeyError('Unknown crosslink id')
+
+    # create the bcform
+    form = BcForm()
+
+    l_monomer_alphabet = xlink_details['l_monomer_alphabet']
+    if l_monomer_alphabet == 'bpforms.ProteinForm':
+        l_monomer = bpforms.ProteinForm().from_str(xlink_details['l_monomer'])
+    elif l_monomer_alphabet == 'bpforms.DnaForm':
+        l_monomer = bpforms.DnaForm().from_str(xlink_details['l_monomer'])
+    elif l_monomer_alphabet == 'bpforms.RnaForm':
+        l_monomer = bpforms.RnaForm().from_str(xlink_details['l_monomer'])
+    else:
+        raise ValueError('Unknown monomer alphabet')
+
+    r_monomer_alphabet = xlink_details['r_monomer_alphabet']
+    if r_monomer_alphabet == 'bpforms.ProteinForm':
+        r_monomer = bpforms.ProteinForm().from_str(xlink_details['r_monomer'])
+    elif r_monomer_alphabet == 'bpforms.DnaForm':
+        r_monomer = bpforms.DnaForm().from_str(xlink_details['r_monomer'])
+    elif r_monomer_alphabet == 'bpforms.RnaForm':
+        r_monomer = bpforms.RnaForm().from_str(xlink_details['r_monomer'])
+    else:
+        raise ValueError('Unknown monomer alphabet')
+
+    form.subunits.append(Subunit(id='l', stoichiometry=1, structure=l_monomer))
+    form.subunits.append(Subunit(id='r', stoichiometry=1, structure=r_monomer))
+    form.crosslinks.append(AbstractedCrosslink(type=xlink_name, l_subunit='l', l_monomer=1, r_subunit='r', r_monomer=1))
+
+    cml = form.export(format='cml')
+    structure, atom_maps = form.get_structure()
+
+    el_table = openbabel.OBElementTable()
+
+    atom_labels = []
+    i_atom = 1
+    atom_labels.append({'position': i_atom,
+                'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
+                'label': xlink_details['l_monomer'],
+                'color': 0x000000})
+
+    i_atom = structure.NumAtoms()
+    atom_labels.append({'position': i_atom,
+                'element': el_table.GetSymbol(structure.GetAtom(i_atom).GetAtomicNum()),
+                'label': xlink_details['r_monomer'],
+                'color': 0x000000})
+
+    i_l_atom = atom_maps[0][1][1]['monomer'][form.crosslinks[0].get_l_bond_atoms()[0].position]
+    i_r_atom = atom_maps[1][1][1]['monomer'][form.crosslinks[0].get_r_bond_atoms()[0].position]
+
+    bond_sets=[{
+        'positions': [[i_l_atom, i_r_atom]],
+        'elements': [[
+                    el_table.GetSymbol(structure.GetAtom(i_l_atom).GetAtomicNum()),
+                    el_table.GetSymbol(structure.GetAtom(i_r_atom).GetAtomicNum()),
+            ]],
+        'color': 0xff0000,
+        }]
+
+
+    return draw_molecule(cml, 'cml', image_format='png',
+                             atom_labels=atom_labels, atom_sets=[], bond_sets=bond_sets,
+                             show_atom_nums=False,
+                             width=300, height=200, include_xml_header=False)
diff --git a/bcforms/xlink/xlink.yml b/bcforms/xlink/xlink.yml
@@ -567,9 +567,9 @@ V,Y:
   l_monomer_alphabet: bpforms.ProteinForm
   l_monomer: V
   l_bond_atoms:
-  - C11
+  - C9
   l_displaced_atoms:
-  - H11
+  - H9
   r_monomer_alphabet: bpforms.ProteinForm
   r_monomer: Y
   r_bond_atoms:

diff --git a/bcforms/xlink/xlink_simple.xlsx b/bcforms/xlink/xlink_simple.xlsx
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -849,38 +849,38 @@ def test_get_structure(self):
         bc_form_1 = core.BcForm().from_str('2*a')
         self.assertTrue(len(bc_form_1.validate())==0)
         bc_form_1.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
-        self.assertEqual(OpenBabelUtils.export(bc_form_1.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)O.C[C@H]([NH3+])C(=O)O')
+        self.assertEqual(OpenBabelUtils.export(bc_form_1.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)O.C[C@H]([NH3+])C(=O)O')
 
         # mini "homodimer" AA
         # linking C[C@H]([NH3+])C(=O)[O-] and C[C@H]([NH3+])C(=O)[O-]
         bc_form_2 = core.BcForm().from_str('2*a | x-link: [l-bond-atom: a(1)-1C8 | l-displaced-atom: a(1)-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: a(2)-1N4-1 | r-displaced-atom: a(2)-1H4+1 | r-displaced-atom: a(2)-1H4]')
         self.assertTrue(len(bc_form_2.validate())==0)
         bc_form_2.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
-        self.assertEqual(OpenBabelUtils.export(bc_form_2.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)N[C@@H](C)C(=O)O')
+        self.assertEqual(OpenBabelUtils.export(bc_form_2.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)N[C@@H](C)C(=O)O')
 
         # mini "heterodimer" AG
         # linking C[C@H]([NH3+])C(=O)[O-] and C([NH3+])C(=O)[O-]
         bc_form_3 = core.BcForm().from_str('a+g | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5]')
         self.assertTrue(len(bc_form_3.validate())==0)
         bc_form_3.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
         bc_form_3.set_subunit_attribute('g', 'structure', bpforms.ProteinForm().from_str('G'))
-        self.assertEqual(OpenBabelUtils.export(bc_form_3.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)O')
+        self.assertEqual(OpenBabelUtils.export(bc_form_3.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)O')
 
         # a more realistic example AGGA, where subunits are composed of multiple monomers
         # linking C[C@H]([NH3+])C(=O)NCC(=O)[O-] and C([NH3+])C(=O)N[C@@H](C)C(=O)[O-]
         bc_form_4 = core.BcForm().from_str('ag+ga | x-link: [l-bond-atom: ag-2C2 | l-displaced-atom: ag-2O1 | l-displaced-atom: ag-2H1 | r-bond-atom: ga-1N5-1 | r-displaced-atom: ga-1H5+1 | r-displaced-atom: ga-1H5]')
         self.assertTrue(len(bc_form_4.validate())==0)
         bc_form_4.set_subunit_attribute('ag', 'structure', bpforms.ProteinForm().from_str('AG'))
         bc_form_4.set_subunit_attribute('ga', 'structure', bpforms.ProteinForm().from_str('GA'))
-        self.assertEqual(OpenBabelUtils.export(bc_form_4.get_structure(), 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)NCC(=O)N[C@@H](C)C(=O)O')
+        self.assertEqual(OpenBabelUtils.export(bc_form_4.get_structure()[0], 'smiles', options=[]), 'C[C@H]([NH3+])C(=O)NCC(=O)NCC(=O)N[C@@H](C)C(=O)O')
 
         # a more realistic example ACCMGAGA, where subunits are composed of multiple monomers
         # linking ACCM and 2*GA
         bc_form_5 = core.BcForm().from_str('accm+2*ga | x-link: [l-bond-atom: accm-4C11 | l-displaced-atom: accm-4O13 | l-displaced-atom: accm-4H13 | r-bond-atom: ga(1)-1N5-1 | r-displaced-atom: ga(1)-1H5+1 | r-displaced-atom: ga(1)-1H5] | x-link: [l-bond-atom: ga(1)-2C8 | l-displaced-atom: ga(1)-2O10 | l-displaced-atom: ga(1)-2H10 | r-bond-atom: ga(2)-1N5-1 | r-displaced-atom: ga(2)-1H5+1 | r-displaced-atom: ga(2)-1H5]')
         self.assertTrue(len(bc_form_5.validate())==0)
         bc_form_5.set_subunit_attribute('accm', 'structure', bpforms.ProteinForm().from_str('ACCM'))
         bc_form_5.set_subunit_attribute('ga', 'structure', bpforms.ProteinForm().from_str('GA'))
-        self.assertEqual(OpenBabelUtils.export(bc_form_5.get_structure(), 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('ACCMGAGA').get_structure()[0], 'smiles', options=['canonical']))
+        self.assertEqual(OpenBabelUtils.export(bc_form_5.get_structure()[0], 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('ACCMGAGA').get_structure()[0], 'smiles', options=['canonical']))
 
         # mini "heterodimer" + small molecule AG+CH4
         bc_form_6 = core.BcForm().from_str('a+g+small | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5] | x-link: [l-bond-atom: g-1C4 | l-displaced-atom: g-1H4 | r-bond-atom: small-1C1 | r-displaced-atom: small-1H1 ]')
@@ -892,11 +892,19 @@ def test_get_structure(self):
         conversion.SetInFormat('smi')
         conversion.ReadString(ob_mol, 'C')
         bc_form_6.set_subunit_attribute('small', 'structure', ob_mol)
-        self.assertEqual(OpenBabelUtils.export(bc_form_6.get_structure(), 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('AA').get_structure()[0], 'smiles', options=['canonical']))
+        self.assertEqual(OpenBabelUtils.export(bc_form_6.get_structure()[0], 'smiles', options=['canonical']), OpenBabelUtils.export(bpforms.ProteinForm().from_str('AA').get_structure()[0], 'smiles', options=['canonical']))
 
     def test_export(self):
         bc_form_3 = core.BcForm().from_str('a+g | x-link: [l-bond-atom: a-1C8 | l-displaced-atom: a-1O10 | l-displaced-atom: a(1)-1H10 | r-bond-atom: g-1N5-1 | r-displaced-atom: g-1H5+1 | r-displaced-atom: g-1H5]')
         self.assertTrue(len(bc_form_3.validate())==0)
         bc_form_3.set_subunit_attribute('a', 'structure', bpforms.ProteinForm().from_str('A'))
         bc_form_3.set_subunit_attribute('g', 'structure', bpforms.ProteinForm().from_str('G'))
         self.assertEqual(bc_form_3.export(), 'C[C@H]([NH3+])C(=O)NCC(=O)O')
+
+class MiscellaneousTestCase(unittest.TestCase):
+
+    def test_draw_xlink(self):
+
+        xlinks = list(core.parse_yaml(core._xlink_filename).keys())
+        for xlink in xlinks:
+            img = core.draw_xlink(xlink)