Skip to content

Commit

Permalink
Merge pull request #41 from OBrink/explicite_hydrogen_inclusion
Browse files Browse the repository at this point in the history
Explicite hydrogen inclusion
  • Loading branch information
OBrink committed Jun 29, 2023
2 parents a1ba2a1 + e11c88b commit 32d541f
Show file tree
Hide file tree
Showing 9 changed files with 5,762 additions and 4,123 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors:
given-names: "Kohulan"
orcid: "https://orcid.org/0000-0003-1066-7792"
title: "RanDepict"
version: 1.1.7
version: 1.1.8
doi: 10.5281/zenodo.5205528
date-released: 2021-08-17
url: "https://github.com/OBrink/RanDepict"
Expand Down
2 changes: 1 addition & 1 deletion RanDepict/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"""

__version__ = "1.1.7"
__version__ = "1.1.8"

__all__ = [
"RanDepict",
Expand Down
77 changes: 74 additions & 3 deletions RanDepict/cdk_functionalities.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,29 @@ def cdk_depict(
depiction = self._cdk_render_molecule(molecule, has_R_group, shape)
return depiction

def _cdk_mol_block_to_cxsmiles(self, mol_block: str) -> str:
def _cdk_mol_block_to_cxsmiles(
self,
mol_block: str,
ignore_explicite_hydrogens: bool = True,
) -> str:
"""
This function takes a mol block str and returns the corresponding CXSMILES
with coordinates using the CDK.
Args:
mol_block (str): mol block str
ignore_explicite_hydrogens (bool, optional): whether or not to ignore H
Returns:
str: CXSMILES
"""
atom_container = self._cdk_mol_block_to_iatomcontainer(mol_block)
smi_gen = JClass("org.openscience.cdk.smiles.SmilesGenerator")
flavor = JClass("org.openscience.cdk.smiles.SmiFlavor")
if ignore_explicite_hydrogens:
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
atom_container = manipulator.copyAndSuppressedHydrogens(atom_container)
smi_gen = JClass(cdk_base + "smiles.SmilesGenerator")
flavor = JClass(cdk_base + "smiles.SmiFlavor")
smi_gen = smi_gen(flavor.CxSmilesWithCoords)
cxsmiles = smi_gen.create(atom_container)
return cxsmiles
Expand Down Expand Up @@ -134,6 +143,68 @@ def _cdk_iatomcontainer_to_mol_block(self, i_atom_container) -> str:
mol_str = string_writer.toString()
return str(mol_str)

def _cdk_add_explicite_hydrogen_to_molblock(self, mol_block: str) -> str:
"""
This function takes a mol block and returns the mol block with explicit
hydrogen atoms.
Args:
mol_block (str): mol block that describes a molecule
Returns:
str: The same mol block with explicit hydrogen atoms
"""
i_atom_container = self._cdk_mol_block_to_iatomcontainer(mol_block)
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
manipulator.convertImplicitToExplicitHydrogens(i_atom_container)
mol_block = self._cdk_iatomcontainer_to_mol_block(i_atom_container)
return mol_block

def _cdk_add_explicite_hydrogen_to_smiles(self, smiles: str) -> str:
"""
This function takes a SMILES str and uses CDK to add explicite hydrogen atoms.
It returns an adapted version of the SMILES str.
Args:
smiles (str): SMILES representation of a molecule
Returns:
smiles (str): SMILES representation of a molecule with explicite H
"""
i_atom_container = self._cdk_smiles_to_IAtomContainer(smiles)
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
manipulator.convertImplicitToExplicitHydrogens(i_atom_container)
smi_flavor = JClass("org.openscience.cdk.smiles.SmiFlavor").Absolute
smiles_generator = JClass("org.openscience.cdk.smiles.SmilesGenerator")(
smi_flavor
)
smiles = smiles_generator.create(i_atom_container)
return str(smiles)

def _cdk_remove_explicite_hydrogen_from_smiles(self, smiles: str) -> str:
"""
This function takes a SMILES str and uses CDK to remove explicite hydrogen atoms.
It returns an adapted version of the SMILES str.
Args:
smiles (str): SMILES representation of a molecule
Returns:
smiles (str): SMILES representation of a molecule with explicite H
"""
i_atom_container = self._cdk_smiles_to_IAtomContainer(smiles)
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
i_atom_container = manipulator.copyAndSuppressedHydrogens(i_atom_container)
smi_flavor = JClass("org.openscience.cdk.smiles.SmiFlavor").Absolute
smiles_generator = JClass("org.openscience.cdk.smiles.SmilesGenerator")(
smi_flavor
)
smiles = smiles_generator.create(i_atom_container)
return str(smiles)

def _cdk_get_depiction_generator(self, molecule, has_R_group: bool = False):
"""
This function defines random rendering options for the structure
Expand Down
19 changes: 15 additions & 4 deletions RanDepict/randepict.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,13 @@ def random_depiction(
Returns:
np.array: Chemical structure depiction
"""
orig_styles = self._config.styles
# TODO: add this to depiction feature fingerprint
if self.random_choice([True] + [False] * 5):
smiles = self._cdk_add_explicite_hydrogen_to_smiles(smiles)
self._config.styles = [style for style in orig_styles if style != 'pikachu']
depiction_functions = self.get_depiction_functions(smiles)

self._config.styles = orig_styles
for _ in range(3):
if len(depiction_functions) != 0:
# Pick random depiction function and call it
Expand Down Expand Up @@ -267,11 +272,17 @@ def random_depiction_with_coordinates(
orig_styles = self._config.styles
self._config.styles = [style for style in orig_styles if style != 'pikachu']
depiction_functions = self.get_depiction_functions(smiles)
fun = self.random_choice(depiction_functions)
self._config.styles = orig_styles
# TODO: add this to depiction feature fingerprint
if self.random_choice([True] + [False] * 5):
smiles = self._cdk_add_explicite_hydrogen_to_smiles(smiles)
mol_block = self._smiles_to_mol_block(smiles,
self.random_choice(['rdkit', 'indigo', 'cdk']))
cxsmiles = self._cdk_mol_block_to_cxsmiles(mol_block)
fun = self.random_choice(depiction_functions)
self.random_choice(['rdkit',
'indigo',
'cdk']))
cxsmiles = self._cdk_mol_block_to_cxsmiles(mol_block,
ignore_explicite_hydrogens=True)
depiction = fun(mol_block=mol_block, shape=shape)
if augment:
depiction = self.add_augmentations(depiction)
Expand Down
56 changes: 2 additions & 54 deletions RanDepict/random_markush_structure_generator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from jpype import JClass
# import sys
from typing import List
from .randepict import RandomDepictor

Expand Down Expand Up @@ -49,7 +47,7 @@ def insert_R_group_var(self, smiles: str, num: int) -> str:
Returns:
smiles (str): input SMILES with $num inserted R group variables
"""
smiles = self.add_explicite_hydrogen_to_smiles(smiles)
smiles = self.depictor._cdk_add_explicite_hydrogen_to_smiles(smiles)
potential_replacement_positions = self.get_valid_replacement_positions(smiles)
r_groups = []
# Replace C or H in SMILES with *
Expand All @@ -66,7 +64,7 @@ def insert_R_group_var(self, smiles: str, num: int) -> str:
break
# Remove explicite hydrogen again and get absolute SMILES
smiles = "".join(smiles)
smiles = self.remove_explicite_hydrogen_from_smiles(smiles)
smiles = self.depictor._cdk_remove_explicite_hydrogen_from_smiles(smiles)
# Replace * with R groups
for r_group in r_groups:
smiles = smiles.replace("*", r_group, 1)
Expand Down Expand Up @@ -136,53 +134,3 @@ def get_valid_replacement_positions(self, smiles: str) -> List[int]:
]:
replacement_positions.append(index - 1)
return replacement_positions

def add_explicite_hydrogen_to_smiles(self, smiles: str) -> str:
"""
This function takes a SMILES str and uses CDK to add explicite hydrogen atoms.
It returns an adapted version of the SMILES str.
Args:
smiles (str): SMILES representation of a molecule
Returns:
smiles (str): SMILES representation of a molecule with explicite H
"""
i_atom_container = self.depictor._cdk_smiles_to_IAtomContainer(smiles)

# Add explicite hydrogen atoms
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
manipulator.convertImplicitToExplicitHydrogens(i_atom_container)

# Create absolute SMILES
smi_flavor = JClass("org.openscience.cdk.smiles.SmiFlavor").Absolute
smiles_generator = JClass("org.openscience.cdk.smiles.SmilesGenerator")(
smi_flavor
)
smiles = smiles_generator.create(i_atom_container)
return str(smiles)

def remove_explicite_hydrogen_from_smiles(self, smiles: str) -> str:
"""
This function takes a SMILES str and uses CDK to remove explicite hydrogen atoms.
It returns an adapted version of the SMILES str.
Args:
smiles (str): SMILES representation of a molecule
Returns:
smiles (str): SMILES representation of a molecule with explicite H
"""
i_atom_container = self.depictor._cdk_smiles_to_IAtomContainer(smiles)
# Remove explicite hydrogen atoms
cdk_base = "org.openscience.cdk."
manipulator = JClass(cdk_base + "tools.manipulator.AtomContainerManipulator")
i_atom_container = manipulator.copyAndSuppressedHydrogens(i_atom_container)
# Create absolute SMILES
smi_flavor = JClass("org.openscience.cdk.smiles.SmiFlavor").Absolute
smiles_generator = JClass("org.openscience.cdk.smiles.SmilesGenerator")(
smi_flavor
)
smiles = smiles_generator.create(i_atom_container)
return str(smiles)
27 changes: 14 additions & 13 deletions Tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,20 @@ def test_get_depiction_functions_normal(self):
difference = set(observed) ^ set(expected)
assert not difference

def test_add_explicite_hydrogen_to_smiles(self):
# Assert that hydrogen atoms are added
input_smiles = "CCC"
expected_output = "C([H])([H])([H])C([H])([H])C([H])([H])[H]"
observed_output = self.depictor._cdk_add_explicite_hydrogen_to_smiles(input_smiles)
assert expected_output == observed_output

def test_remove_explicite_hydrogen_to_smiles(self):
# Assert that hydrogen atoms are removed
input_smiles = "C([H])([H])([H])C([H])([H])C([H])([H])[H]"
expected_output = "CCC"
observed_output = self.depictor._cdk_remove_explicite_hydrogen_from_smiles(input_smiles)
assert expected_output == observed_output

def test_get_depiction_functions_isotopes(self):
# PIKAChU can't handle isotopes
observed = self.depictor.get_depiction_functions("[13CH3]N1C=NC2=C1C(=O)N(C(=O)N2C)C")
Expand Down Expand Up @@ -516,19 +530,6 @@ def test_insert_R_group_var_can_be_depicted(self):
depiction = self.depictor.random_depiction(output_smiles)
assert type(depiction) == np.ndarray

def test_add_explicite_hydrogen_to_smiles(self):
# Assert that hydrogen atoms are added
input_smiles = "CCC"
expected_output = "C([H])([H])([H])C([H])([H])C([H])([H])[H]"
observed_output = self.markush_creator.add_explicite_hydrogen_to_smiles(input_smiles)
assert expected_output == observed_output

def test_remove_explicite_hydrogen_to_smiles(self):
# Assert that hydrogen atoms are removed
input_smiles = "C([H])([H])([H])C([H])([H])C([H])([H])[H]"
expected_output = "CCC"
observed_output = self.markush_creator.remove_explicite_hydrogen_from_smiles(input_smiles)
assert expected_output == observed_output

def test_get_valid_replacement_positions_simple_chain(self):
# Simple example case
Expand Down

0 comments on commit 32d541f

Please sign in to comment.