Skip to content

Commit

Permalink
Smiles parser setting to preserve aromaticity as provided in the Smil…
Browse files Browse the repository at this point in the history
…es String itself.

Signed-off-by: Egon Willighagen <egonw@users.sourceforge.net>
  • Loading branch information
Mark Rynbeek authored and egonw committed Sep 7, 2010
1 parent 91c58dd commit ae21ee2
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 36 deletions.
111 changes: 75 additions & 36 deletions src/main/org/openscience/cdk/smiles/SmilesParser.java
Expand Up @@ -100,10 +100,18 @@ public class SmilesParser {
private int status = 0;
protected IChemObjectBuilder builder;

private enum Chirality {

private enum Chirality {
ANTI_CLOCKWISE, // aka @
CLOCKWISE // aka @@
}

/*
* Boolean to preserve aromaticity as provided in the Smiles itself (through lowecase letters (c1cccc1) or colons).
* Setting this to true means that CDK will not do aromaticity detection, nor atom typing (as this may conflict
* with the preserved aromaticity).
*/
private boolean preservingAromaticity=false;

/**
* Constructor for the SmilesParser object.
Expand Down Expand Up @@ -245,21 +253,32 @@ public IMolecule parseSmiles(String smiles) throws InvalidSmilesException {
molecule.addStereoElement(l4Chiral);
}

// perceive atom types
CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(molecule.getBuilder());
int i = 0;
for (IAtom atom : molecule.atoms()) {
i++;
try {
IAtomType type = matcher.findMatchingAtomType(molecule, atom);
AtomTypeManipulator.configure(atom, type);
} catch (Exception e) {
System.out.println("Cannot percieve atom type for the " + i + "th atom: " + atom.getSymbol());
atom.setAtomTypeName("X");
if (!preservingAromaticity ) {
// perceive atom types
CDKAtomTypeMatcher matcher = CDKAtomTypeMatcher.getInstance(molecule.getBuilder());
int i = 0;
for (IAtom atom : molecule.atoms()) {
i++;
try {
IAtomType type = matcher.findMatchingAtomType(molecule, atom);
AtomTypeManipulator.configure(atom, type);
} catch (Exception e) {
System.out.println("Cannot percieve atom type for the " + i + "th atom: " + atom.getSymbol());
atom.setAtomTypeName("X");
}
}
this.addImplicitHydrogens(molecule);
this.perceiveAromaticity(molecule);
}
else {
for (IBond bond : molecule.bonds() ) {
if(!bond.getFlag(CDKConstants.ISAROMATIC) &&
bond.getAtom(0).getFlag(CDKConstants.ISAROMATIC) &&
bond.getAtom(1).getFlag(CDKConstants.ISAROMATIC)) {
bond.setFlag(CDKConstants.ISAROMATIC,true);
}
}
}
this.addImplicitHydrogens(molecule);
this.perceiveAromaticity(molecule);

return molecule;
}
Expand Down Expand Up @@ -334,6 +353,9 @@ private IMolecule parseString(String smiles) throws InvalidSmilesException
currentSymbol = currentSymbol.toUpperCase();
atom = builder.newInstance(IAtom.class,currentSymbol);
atom.setHybridization(Hybridization.SP2);
if (preservingAromaticity ) {
atom.setFlag(CDKConstants.ISAROMATIC, true);
}
} else
{
atom = builder.newInstance(IAtom.class,currentSymbol);
Expand All @@ -357,8 +379,12 @@ private IMolecule parseString(String smiles) throws InvalidSmilesException
{
logger.debug("Creating bond between ", atom.getSymbol(), " and ", lastNode.getSymbol());
bond = builder.newInstance(IBond.class,atom, lastNode, bondStatus);
if (bondIsAromatic) {
if (bondIsAromatic) {
bond.setFlag(CDKConstants.ISAROMATIC, true);
if (preservingAromaticity) {
bond.getAtom(0).setFlag(CDKConstants.ISAROMATIC, true);
bond.getAtom(1).setFlag(CDKConstants.ISAROMATIC, true);
}
}
molecule.addBond(bond);
}
Expand Down Expand Up @@ -751,27 +777,23 @@ private IAtom assembleAtom(String s, IAtom lastNode, boolean bondExists) throws
{
logger.debug("Found element symbol: ", currentSymbol);
position = position + currentSymbol.length();
if (currentSymbol.length() == 1)
{
if (!(currentSymbol.toUpperCase()).equals(currentSymbol))
{
currentSymbol = currentSymbol.toUpperCase();
atom = builder.newInstance(IAtom.class,currentSymbol);
atom.setHybridization(Hybridization.SP2);

Integer hcount = atom.getImplicitHydrogenCount() == CDKConstants.UNSET ? 0 : atom.getImplicitHydrogenCount();
if (hcount > 0)
{
atom.setImplicitHydrogenCount(hcount - 1);
}
} else
{
atom = builder.newInstance(IAtom.class,currentSymbol);
}
} else
{
atom = builder.newInstance(IAtom.class,currentSymbol);
}
if(Character.isLowerCase(mychar)) {
if (currentSymbol.length() == 1) {
currentSymbol = currentSymbol.toUpperCase();
}
atom = builder.newInstance(IAtom.class,currentSymbol);
atom.setHybridization(Hybridization.SP2);
Integer hcount = atom.getImplicitHydrogenCount() == CDKConstants.UNSET ? 0 : atom.getImplicitHydrogenCount();
if (hcount > 0) {
atom.setImplicitHydrogenCount(hcount - 1);
}
if (preservingAromaticity )
atom.setFlag(CDKConstants.ISAROMATIC, true);
}
else
{
atom = builder.newInstance(IAtom.class,currentSymbol);
}
logger.debug("Made atom: ", atom);
}
break;
Expand Down Expand Up @@ -988,6 +1010,23 @@ private void perceiveAromaticity(IMolecule m) {
}
}
}

/**
* Makes the Smiles parser set aromaticity as provided in the Smiles itself, without detecting it.
* Default false. Atoms will not be typed when set to true.
* @param preservingAromaticity boolean to indicate if aromaticity is to be preserved.
*/
public void setPreservingAromaticity(boolean preservingAromaticity) {
this.preservingAromaticity = preservingAromaticity;
}

/**
* Gets the (default false) setting to preserve aromaticity as provided in the Smiles itself.
* @return true or false indicating if aromaticity is preserved.
*/
public boolean isPreservingAromaticity() {
return preservingAromaticity;
}

}

48 changes: 48 additions & 0 deletions src/test/org/openscience/cdk/smiles/SmilesParserTest.java
Expand Up @@ -2269,5 +2269,53 @@ public void testAromaticity() throws InvalidSmilesException{
}
}
}

@Test public void testPreserveAromaticity() throws InvalidSmilesException{
SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
sp.setPreservingAromaticity(true);
IMolecule molecule = sp.parseSmiles("Oc1ccc(Cl)c2C(=O)c3c(sc4nccn34)C(=O)c12");
Assert.assertEquals(14, countAromaticAtoms(molecule));
Assert.assertEquals(15, countAromaticBonds(molecule));

molecule = sp.parseSmiles("COc1ccc2[nH]c3c(cnn4c(C)nnc34)c2c1");
Assert.assertEquals(16, countAromaticAtoms(molecule));
Assert.assertEquals(19, countAromaticBonds(molecule));

molecule = sp.parseSmiles("C:1C:C:C:C:C1");
Assert.assertEquals(6, countAromaticAtoms(molecule));
Assert.assertEquals(6, countAromaticBonds(molecule));

molecule = sp.parseSmiles("c1cc[se]cc1");
Assert.assertEquals(6, countAromaticAtoms(molecule));
Assert.assertEquals(6, countAromaticBonds(molecule));

}

/**
* Counts aromatic atoms in a molecule.
* @param mol molecule for which to count aromatic atoms.
*/
private int countAromaticAtoms(IMolecule mol) {
int aromCount=0;
for (IAtom atom : mol.atoms() ) {
if(atom.getFlag(CDKConstants.ISAROMATIC))
aromCount++;
}
return aromCount;
}

/**
* Counts aromatic bonds in a molecule.
* @param mol molecule for which to count aromatic bonds.
*/
private int countAromaticBonds(IMolecule mol) {
int aromCount=0;
for (IBond bond : mol.bonds() ) {
if(bond.getFlag(CDKConstants.ISAROMATIC))
aromCount++;
}
return aromCount;
}

}

0 comments on commit ae21ee2

Please sign in to comment.