diff --git a/.classpath b/.classpath index 6e9cd8319a6..f76fdf9cb1d 100644 --- a/.classpath +++ b/.classpath @@ -20,7 +20,7 @@ - + diff --git a/AUTHORS b/AUTHORS index 699a83f7ae9..8ab807877b4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -32,6 +32,7 @@ Niels Out Jerome Pansanel Chris Pudney Jonathan Rienstra-Kiracofe +Mark Rijnbeek David Robinson Miguel Rojas Cherto Bhupinder Sandhu diff --git a/src/META-INF/io.cdkdepends b/src/META-INF/io.cdkdepends index 2cb982fe0b1..33f3a7e9ca0 100644 --- a/src/META-INF/io.cdkdepends +++ b/src/META-INF/io.cdkdepends @@ -4,3 +4,5 @@ cdk-ioformats.jar cdk-core.jar cdk-standard.jar cdk-atomtype.jar +cdk-isomorphism.jar +cdk-data.jar diff --git a/src/META-INF/test-io.cdkdepends b/src/META-INF/test-io.cdkdepends index 1eaca8616d5..f47cfa4e3e6 100644 --- a/src/META-INF/test-io.cdkdepends +++ b/src/META-INF/test-io.cdkdepends @@ -19,3 +19,4 @@ cdk-smiles.jar cdk-nonotify.jar cdk-test.jar cdk-testdata.jar +cdk-isomorphism.jar diff --git a/src/main/org/openscience/cdk/CDKConstants.java b/src/main/org/openscience/cdk/CDKConstants.java index 6d408cd8436..415a5e94084 100644 --- a/src/main/org/openscience/cdk/CDKConstants.java +++ b/src/main/org/openscience/cdk/CDKConstants.java @@ -288,6 +288,14 @@ public class CDKConstants { */ public static final String ISOTROPIC_SHIELDING = "cdk:IsotropicShielding"; + /** + * A property to indicate RestH being true or false. RestH is a term + * used in RGroup queries: "if this property is applied ('on'), sites labeled + * with Rgroup rrr may only be substituted with a member of the Rgroup or with H" + */ + public static final String REST_H = "cdk:RestH"; + + /**************************************** * Some predefined property names for * * AtomTypes * diff --git a/src/main/org/openscience/cdk/io/RGroupQueryReader.java b/src/main/org/openscience/cdk/io/RGroupQueryReader.java new file mode 100644 index 00000000000..32fc2ff431d --- /dev/null +++ b/src/main/org/openscience/cdk/io/RGroupQueryReader.java @@ -0,0 +1,473 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringTokenizer; + +import org.openscience.cdk.annotations.TestClass; +import org.openscience.cdk.annotations.TestMethod; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IChemObject; +import org.openscience.cdk.interfaces.IMolecule; +import org.openscience.cdk.interfaces.IPseudoAtom; +import org.openscience.cdk.io.formats.IResourceFormat; +import org.openscience.cdk.io.formats.RGroupQueryFormat; +import org.openscience.cdk.isomorphism.matchers.IRGroupQuery; +import org.openscience.cdk.isomorphism.matchers.RGroup; +import org.openscience.cdk.isomorphism.matchers.RGroupList; +import org.openscience.cdk.isomorphism.matchers.RGroupQuery; +import org.openscience.cdk.tools.ILoggingTool; +import org.openscience.cdk.tools.LoggingToolFactory; + +/** + * A reader for Symyx' Rgroup files (RGFiles). + * An RGfile describes a single molecular query with Rgroups. + * Each RGfile is a combination of Ctabs defining the root molecule and each + * member of each Rgroup in the query. + * + *

The RGFile format is described in the manual + * + * "CTFile Formats" , Chapter 5. + * + * @cdk.module io + * @cdk.githash + * + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ +@TestClass("org.openscience.cdk.io.RGroupQueryReaderTest") +public class RGroupQueryReader extends DefaultChemObjectReader { + + /** + * Private bean style class to capture LOG (logic) lines. + */ + private class RGroupLogic { + int rgoupNumberRequired; + boolean restH; + String occurence; + } + + BufferedReader input = null; + private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQueryReader.class); + + /** + * Default constructor, input not set. + */ + public RGroupQueryReader() { + this(new StringReader("")); + } + + /** + * Constructs a new RgroupQueryReader that can read RgroupAtomContainerSet + * from a given InputStream. + * @param in The InputStream to read from. + */ + public RGroupQueryReader(InputStream in) { + this(new InputStreamReader(in)); + } + + /** + * Constructs a new RgroupQueryReader that can read RgroupAtomContainerSet + * from a given Reader. + * @param in The Reader to read from. + */ + public RGroupQueryReader(Reader in) { + input = new BufferedReader(in); + } + + + /** + * Sets the input Reader. + * @param input Reader object + * @throws CDKException + */ + @TestMethod("testSetReader_Reader") + public void setReader(Reader input) throws CDKException { + if (input instanceof BufferedReader) { + this.input = (BufferedReader)input; + } else { + this.input = new BufferedReader(input); + } + } + + @TestMethod("testSetReader_InputStream") + public void setReader(InputStream input) throws CDKException { + setReader(new InputStreamReader(input)); + } + + @TestMethod("testGetFormat") + public IResourceFormat getFormat() { + return RGroupQueryFormat.getInstance(); + } + + @TestMethod("testAccepts") + public boolean accepts(Class classObject) { + Class[] interfaces = classObject.getInterfaces(); + for (Class anInterface : interfaces) { + if (IRGroupQuery.class.equals(anInterface)) return true; + } + Class superClass = classObject.getSuperclass(); + if (superClass != null) return this.accepts(superClass); + return false; + } + + @TestMethod("testClose") + public void close() throws IOException { + input.close(); + } + + /** + * Check input IChemObject and proceed to parse. + * Accepts/returns IChemObject of type RGroupQuery only. + * @return IChemObject read from file + * @param object class must be of type RGroupQuery + */ + public IChemObject read(IChemObject object) throws CDKException { + if (object instanceof RGroupQuery) { + return parseRGFile((RGroupQuery)object); + } else { + throw new CDKException + ("Reader only supports "+RGroupQuery.class.getName()+" objects"); + } + } + + + /** + * Parse the RGFile. Uses of {@link org.openscience.cdk.io.MDLV2000Reader} + * to parse individual $CTAB blocks. + * + * @param rGroupQuery empty + * @return populated query + * @throws CDKException + */ + private RGroupQuery parseRGFile(RGroupQuery rGroupQuery) throws CDKException { + String line = ""; + int lineCount = 0; + String eol = System.getProperty("line.separator"); + StringTokenizer strTk=null; + /* Variable to capture the LOG line(s) */ + Map logicDefinitions = new HashMap(); + + /* Variable to captures attachment order for Rgroups. + * Contains: + * - pseudo atom (Rgroup) + * - map with (integer,bond) meaning "bond" has attachment + * order "integer" (1,2,3) for the Rgroup + * The order is based on the atom block, unless there is an AAL line + * for the pseudo atom. + */ + Map> attachmentPoints = new HashMap>(); + + + try { + // Process the Header block_________________________________________ + //__________________________________________________________________ + logger.info("Process the Header block"); + checkLineBeginsWith(input.readLine(), "$MDL", ++lineCount); + checkLineBeginsWith(input.readLine(), "$MOL", ++lineCount); + checkLineBeginsWith(input.readLine(), "$HDR", ++lineCount); + + for (int i = 1; i <= 3; i++) { + lineCount++; + if (input.readLine() == null) { + throw new CDKException("RGFile invalid, empty/null header line at #" + lineCount); + } + //optional: parse header info here (not implemented) + } + checkLineBeginsWith(input.readLine(), "$END HDR", ++lineCount); + + + //Process the root structure (scaffold)_____________________________ + //__________________________________________________________________ + logger.info("Process the root structure (scaffold)"); + checkLineBeginsWith(input.readLine(), "$CTAB", ++lineCount); + //Force header + StringBuilder sb = new StringBuilder(RGroup.ROOT_LABEL+"\n\n\n"); + line = input.readLine(); + ++lineCount; + while (line != null && !line.equals("$END CTAB")) { + sb.append(line + eol); + + //LOG lines: Logic, Unsatisfied Sites, Range of Occurrence. + if (line.startsWith("M LOG")) { + strTk = new StringTokenizer(line); + strTk.nextToken(); + strTk.nextToken(); + strTk.nextToken(); + RGroupLogic log = null; + + log = new RGroupLogic(); + int rgroupNumber = new Integer(strTk.nextToken()); + String tok = strTk.nextToken(); + log.rgoupNumberRequired = tok.equals("0") ? 0 : new Integer(tok); + log.restH = strTk.nextToken().equals("1") ? true : false; + tok = ""; + while (strTk.hasMoreTokens()) { + tok += strTk.nextToken(); + } + log.occurence = tok; + logicDefinitions.put(rgroupNumber, log); + } + + line = input.readLine(); + ++lineCount; + } + String rootStr = sb.toString(); + + //Let MDL reader process $CTAB block of the root structure. + MDLV2000Reader reader = new MDLV2000Reader(new StringReader(rootStr), ISimpleChemObjectReader.Mode.STRICT); + IMolecule root = (IMolecule)reader.read(rGroupQuery.getBuilder().newMolecule()); + rGroupQuery.setRootStructure(root); + List atomsByLinePosition = reader.getAtomsByLinePosition(); + + //Atom attachment order: parse AAL lines first + strTk = new StringTokenizer(rootStr, eol); + while (strTk.hasMoreTokens()) { + line = strTk.nextToken(); + if (line.startsWith("M AAL")) { + StringTokenizer stAAL = new StringTokenizer(line); + stAAL.nextToken(); + stAAL.nextToken(); + int pos = new Integer(stAAL.nextToken()); + IAtom rGroup = atomsByLinePosition.get(pos); + stAAL.nextToken(); + Map bondMap = new HashMap(); + while (stAAL.hasMoreTokens()) { + pos = new Integer(stAAL.nextToken()); + IAtom partner = atomsByLinePosition.get(pos); + IBond bond = root.getBond(rGroup, partner); + int order = new Integer(stAAL.nextToken()); + bondMap.put(order, bond); + logger.info("AAL " + order + " " + ((IPseudoAtom)rGroup).getLabel() + + "-" + partner.getSymbol()); + } + if (bondMap.size()!=0) { + attachmentPoints.put(rGroup, bondMap); + } + + } + } + //Deal with remaining attachment points (non AAL) + for (IAtom atom : root.atoms()) { + if (atom instanceof IPseudoAtom) { + IPseudoAtom rGroup = (IPseudoAtom)atom; + if (rGroup.getLabel().startsWith("R") && + !rGroup.getLabel().equals("R") && // only numbered ones + !attachmentPoints.containsKey(rGroup)) { + //Order reflects the order of atoms in the Atom Block + int order = 0; + Map bondMap = new HashMap(); + for (IAtom atom2 : atomsByLinePosition) { + if (!atom.equals(atom2)) { + for (IBond bond : root.bonds()) { + if (bond.contains(atom) && bond.contains(atom2)) { + bondMap.put(++order, bond); + logger.info("Def " + order + " " + rGroup.getLabel() + "-" + + atom2.getSymbol()); + break; + } + } + } + } + if (bondMap.size()!=0) { + attachmentPoints.put(rGroup, bondMap); + } + } + } + } + //Done with attachment points + rGroupQuery.setRootAttachmentPoints(attachmentPoints); + logger.info("Attachm.points defined for " + attachmentPoints.size() + " R# atoms"); + + + //Process each Rgroup's $CTAB block(s)_____________________________ + //__________________________________________________________________ + + //Set up the RgroupLists, one for each unique R# (# = 1..32 max) + Map rGroupDefinitions = new HashMap(); + + for (IAtom atom : root.atoms()) { + if (atom instanceof IPseudoAtom) { + IPseudoAtom rGroup = (IPseudoAtom)atom; + if (RGroupQuery.isValidRgroupQueryLabel(rGroup.getLabel())) { + int rgroupNum = new Integer(rGroup.getLabel().substring(1)); + RGroupList rgroupList = new RGroupList(rgroupNum); + if (!rGroupDefinitions.containsKey(rgroupNum)) { + logger.info("Define Rgroup R" + rgroupNum); + RGroupLogic logic = logicDefinitions.get(rgroupNum); + if (logic != null) { + rgroupList.setRestH(logic.restH); + rgroupList.setOccurrence(logic.occurence); + rgroupList.setRequiredRGroupNumber(logic.rgoupNumberRequired); + } else { + rgroupList.setRestH(false); + rgroupList.setOccurrence(">0"); + rgroupList.setRequiredRGroupNumber(0); + } + rgroupList.setRGroups(new ArrayList()); + rGroupDefinitions.put(rgroupNum, rgroupList); + } + } + } + } + + //Parse all $CTAB blocks per Rgroup (there can be more than one) + line = input.readLine(); + ++lineCount; + boolean hasMoreRGP = true; + while (hasMoreRGP) { + + checkLineBeginsWith(line, "$RGP", lineCount); + line = input.readLine(); + ++lineCount; + logger.info("line for num is " + line); + int rgroupNum = new Integer(line.trim()); + line = input.readLine(); + ++lineCount; + + boolean hasMoreCTAB = true; + while (hasMoreCTAB) { + + checkLineBeginsWith(line, "$CTAB", lineCount); + sb = new StringBuilder(RGroup.makeLabel(rgroupNum)+"\n\n\n"); + line = input.readLine(); + while (line != null && !line.startsWith("$END CTAB")) { + sb.append(line + eol); + line = input.readLine(); + ++lineCount; + } + String groupStr = sb.toString(); + reader = new MDLV2000Reader + (new StringReader(groupStr), ISimpleChemObjectReader.Mode.STRICT); + IMolecule group = (IMolecule)reader.read(rGroupQuery.getBuilder().newMolecule()); + atomsByLinePosition = reader.getAtomsByLinePosition(); + RGroup rGroup = new RGroup(); + rGroup.setGroup(group); + + //Parse the Rgroup's attachment points (APO) + strTk = new StringTokenizer(groupStr, eol); + while (strTk.hasMoreTokens()) { + line = strTk.nextToken(); + if (line.startsWith("M APO")) { + StringTokenizer stAPO = new StringTokenizer(line); + stAPO.nextToken(); + stAPO.nextToken(); + stAPO.nextToken(); + while (stAPO.hasMoreTokens()) { + int pos = new Integer(stAPO.nextToken()); + int apo = new Integer(stAPO.nextToken()); + IAtom at = atomsByLinePosition.get(pos); + switch (apo) { + case 1: + rGroup.setFirstAttachmentPoint(at); + break; + case 2: + rGroup.setSecondAttachmentPoint(at); + break; + case 3: { + rGroup.setFirstAttachmentPoint(at); + rGroup.setSecondAttachmentPoint(at); + } + break; + } + } + } + } + RGroupList rList = rGroupDefinitions.get(rgroupNum); + if (rList==null) { + throw new CDKException("R"+rgroupNum+" not defined but referenced in $RGP."); + } + else { + rList.getRGroups().add(rGroup); + } + line = input.readLine(); + ++lineCount; + if (line.startsWith("$END RGP")) { + logger.info("end of RGP block"); + hasMoreCTAB = false; + } + } + + line = input.readLine(); + ++lineCount; + if (line.startsWith("$END MOL")) { + hasMoreRGP = false; + } + } + + rGroupQuery.setRGroupDefinitions(rGroupDefinitions); + logger.info("Number of lines was " + lineCount); + return rGroupQuery; + + } catch (CDKException exception) { + String error = "CDK Error while parsing line " + lineCount + ": " + line + " -> " + exception.getMessage(); + logger.error(error); + logger.debug(exception); + throw exception; + } catch (Exception exception) { + exception.printStackTrace(); + String error = + exception.getClass() + "Error while parsing line " + lineCount + ": " + line + " -> " + exception.getMessage(); + logger.error(error); + logger.debug(exception); + throw new CDKException(error, exception); + } + } + + /** + * Checks that a given line starts as expected, according to RGFile format. + * @param line + * @param expect + * @param lineCount + * @throws CDKException + */ + private void checkLineBeginsWith(String line, String expect, int lineCount) throws CDKException { + if (line == null) { + throw new CDKException("RGFile invalid, empty/null line at #" + lineCount); + } + if (!line.startsWith(expect)) { + throw new CDKException("RGFile invalid, line #" + lineCount + " should start with:" + expect + "."); + } + } + + +} + + diff --git a/src/main/org/openscience/cdk/io/RGroupQueryWriter.java b/src/main/org/openscience/cdk/io/RGroupQueryWriter.java new file mode 100644 index 00000000000..266b097d297 --- /dev/null +++ b/src/main/org/openscience/cdk/io/RGroupQueryWriter.java @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Map; + +import org.openscience.cdk.annotations.TestMethod; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IChemObject; +import org.openscience.cdk.io.formats.IResourceFormat; +import org.openscience.cdk.io.formats.RGroupQueryFormat; +import org.openscience.cdk.isomorphism.matchers.IRGroupQuery; +import org.openscience.cdk.isomorphism.matchers.RGroup; +import org.openscience.cdk.isomorphism.matchers.RGroupList; + +/** + * A writer for Symyx' Rgroup files (RGFiles).
+ * An RGfile describes a single molecular query with Rgroups. + * Each RGfile is a combination of Ctabs defining the root molecule and each + * member of each Rgroup in the query. + *
+ * This class relies on the {@link org.openscience.cdk.io.MDLWriter} to + * create CTAB data blocks. + * + * @cdk.module io + * @cdk.githash + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ + +public class RGroupQueryWriter extends DefaultChemObjectWriter { + + private BufferedWriter writer; + private static String LSEP = System.getProperty("line.separator"); + + /** + * Constructs a new writer that can write an {@link IRGroupQuery} + * to the Symx RGFile format. + * + * @param out The Writer to write to + */ + public RGroupQueryWriter(Writer out) { + if (out instanceof BufferedWriter) { + writer = (BufferedWriter)out; + } else { + writer = new BufferedWriter(out); + } + } + + /** + * Zero argument constructor. + */ + public RGroupQueryWriter() { + this(new StringWriter()); + } + + /** + * Returns true for accepted input types. + */ + @SuppressWarnings("unchecked") + @TestMethod("testAccepts") + public boolean accepts(Class classObject) { + Class[] interfaces = classObject.getInterfaces(); + for (Class anInterface : interfaces) { + if (IRGroupQuery.class.equals(anInterface)) return true; + } + Class superClass = classObject.getSuperclass(); + if (superClass != null) return this.accepts(superClass); + return false; + } + + /** + * Flushes the output and closes this object. + */ + @TestMethod("testClose") + public void close() throws IOException { + writer.close(); + } + + /** + * Produces a CTAB block for an atomContainer, without the header lines. + * @param atomContainer + * @return CTAB block + * @throws CDKException + */ + private String getCTAB (IAtomContainer atomContainer) throws CDKException { + StringWriter strWriter = new StringWriter(); + MDLWriter mdlWriter = new MDLWriter(strWriter); + mdlWriter.write(atomContainer); + String ctab = strWriter.toString(); + //strip of the individual header, as we have one super header instead. + for (int line=1; line <=3; line++ ){ + ctab = ctab.substring(ctab.indexOf(LSEP)+1); + } + return ctab; + } + + /** + * Returns output format. + */ + @TestMethod("testGetFormat") + public IResourceFormat getFormat() { + return RGroupQueryFormat.getInstance(); + } + + /** + * Sets the writer to given output stream. + */ + public void setWriter(OutputStream output) throws CDKException { + setWriter(new OutputStreamWriter(output)); + } + + /** + * Sets the writer. + */ + public void setWriter(Writer out) throws CDKException { + if (out instanceof BufferedWriter) { + writer = (BufferedWriter)out; + } else { + writer = new BufferedWriter(out); + } + } + + /** + * The actual writing of the output. + * @throws CDKException + * @throws IOException + */ + public void write(IChemObject object) throws CDKException { + if (!(object instanceof IRGroupQuery)) { + throw new CDKException("Only IRGroupQuery input is accepted."); + } + try { + + IRGroupQuery rGroupQuery = (IRGroupQuery) object; + String now=new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis()); + IAtomContainer rootAtc = rGroupQuery.getRootStructure(); + + //Construct header + StringBuffer rootBlock=new StringBuffer(); + String header = + "$MDL REV 1 "+now+LSEP+ + "$MOL\n" + + "$HDR\n" + + " Rgroup query file (RGFile)\n"+ + " CDK "+now+"2D\n\n"+ + "$END HDR\n"+ + "$CTAB"; + rootBlock.append(header).append(LSEP); + + //Construct the root structure, the scaffold + String rootCTAB = getCTAB(rootAtc); + rootCTAB = rootCTAB.replaceAll("\nM END\n",""); + rootBlock.append(rootCTAB).append(LSEP); + + //Write the root's LOG lines + for(Integer rgrpNum : rGroupQuery.getRGroupDefinitions().keySet()) { + RGroupList rgList = rGroupQuery.getRGroupDefinitions().get(rgrpNum); + int restH = rgList.isRestH()?1:0; + String logLine = + "M LOG"+ + MDLWriter.formatMDLInt(1, 3)+ + MDLWriter.formatMDLInt(rgrpNum, 4)+ + MDLWriter.formatMDLInt(rgList.getRequiredRGroupNumber(), 4)+ + MDLWriter.formatMDLInt(restH, 4)+ + " "+rgList.getOccurrence() + ; + rootBlock.append(logLine).append(LSEP); + } + + //AAL lines are optional, they are needed for R-atoms with multiple bonds to the root + //for which the order of the attachment points can not be implicitly derived + //from the order in the atom block. See CT spec for more on that. + for (IAtom rgroupAtom : rGroupQuery.getRootAttachmentPoints().keySet()) { + Map rApo= rGroupQuery.getRootAttachmentPoints().get(rgroupAtom); + if (rApo.size()>1) { + int prevPos=-1; + int apoIdx=1; + boolean implicitlyOrdered=true; + while (rApo.get(apoIdx)!=null && implicitlyOrdered) { + IAtom partner=rApo.get(apoIdx).getConnectedAtom(rgroupAtom); + for(int atIdx=0; atIdx rgrpList = rGroupQuery.getRGroupDefinitions().get(rgrpNum).getRGroups(); + if(rgrpList!=null && rgrpList.size()!=0) { + rgpBlock.append("$RGP").append(LSEP);; + rgpBlock.append(MDLWriter.formatMDLInt(rgrpNum, 4)).append(LSEP); + + for (RGroup rgroup : rgrpList) { + //CTAB block + rgpBlock.append("$CTAB").append(LSEP); + String ctab=getCTAB(rgroup.getGroup()); + ctab = ctab.replaceAll(LSEP+"M END"+LSEP,""); + rgpBlock.append(ctab).append(LSEP); + + //The APO line + IAtom firstAttachmentPoint= rgroup.getFirstAttachmentPoint(); + IAtom secondAttachmentPoint=rgroup.getSecondAttachmentPoint(); + int apoCount=0; + if (firstAttachmentPoint!=null) { + StringBuffer apoLine=new StringBuffer(); + for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) { + if (rgroup.getGroup().getAtom(atIdx).equals(firstAttachmentPoint)) { + apoLine.append(MDLWriter.formatMDLInt((atIdx+1), 3)); + apoCount++; + if (secondAttachmentPoint!=null && + secondAttachmentPoint.equals(firstAttachmentPoint)) { + apoLine.append(MDLWriter.formatMDLInt(3, 3)); + } + else { + apoLine.append(MDLWriter.formatMDLInt(1, 3)); + } + } + } + if (secondAttachmentPoint!=null && !secondAttachmentPoint.equals(firstAttachmentPoint)) { + for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) { + if (rgroup.getGroup().getAtom(atIdx).equals(secondAttachmentPoint)) { + apoCount++; + apoLine.append(MDLWriter.formatMDLInt((atIdx+1), 3)); + apoLine.append(MDLWriter.formatMDLInt(2, 3)); + } + } + } + if (apoCount>0) { + apoLine.insert(0, "M APO"+MDLWriter.formatMDLInt(apoCount, 3)); + rgpBlock.append(apoLine).append(LSEP); + } + } + + rgpBlock.append("M END").append(LSEP); + rgpBlock.append("$END CTAB").append(LSEP); + } + rgpBlock.append("$END RGP").append(LSEP); + } + } + rgpBlock.append("$END MOL").append(LSEP); + + writer.write(rootBlock.toString()); + writer.write(rgpBlock.toString()); + writer.flush(); + + } catch (IOException e) { + e.printStackTrace(); + throw new CDKException("Unexpected excpetion when writing RGFile.\n"+e.getMessage()); + } + + } +} diff --git a/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java b/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java new file mode 100644 index 00000000000..ced87924b49 --- /dev/null +++ b/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io.formats; + +import org.openscience.cdk.annotations.TestMethod; +import org.openscience.cdk.tools.DataFeatures; + +/** + * Format for Symyx RGfiles (Rgroup query files). + * @cdk.module ioformats + * @cdk.githash + * @cdk.set io-formats + */ +public class RGroupQueryFormat implements IChemFormatMatcher { + + + private static IResourceFormat myself = null; + + private RGroupQueryFormat() { + } + + @TestMethod("testResourceFormatSet") + public static IResourceFormat getInstance() { + if (myself == null) + myself = new RGroupQueryFormat(); + return myself; + } + + @TestMethod("testGetFormatName") + public String getFormatName() { + return "Symyx Rgroup query files"; + } + + @TestMethod("testGetMIMEType") + public String getMIMEType() { + return null; + } + + @TestMethod("testGetPreferredNameExtension") + public String getPreferredNameExtension() { + return getNameExtensions()[0]; + } + + @TestMethod("testGetNameExtensions") + public String[] getNameExtensions() { + return new String[] { "mol", "rgp" }; + } + + @TestMethod("testGetReaderClassName") + public String getReaderClassName() { + return "org.openscience.cdk.io.RGroupQueryReader"; + } + + @TestMethod("testGetWriterClassName") + public String getWriterClassName() { + return "org.openscience.cdk.io.RGroupQueryWriter"; + } + + public boolean matches(int lineNumber, String line) { + if (line.indexOf("$RGP") >= 0) + return true; + else + return false; + } + + @TestMethod("testIsXMLBased") + public boolean isXMLBased() { + return false; + } + + @TestMethod("testGetSupportedDataFeatures") + public int getSupportedDataFeatures() { + return getRequiredDataFeatures() | DataFeatures.HAS_2D_COORDINATES ; + } + + @TestMethod("testGetRequiredDataFeatures") + public int getRequiredDataFeatures() { + return DataFeatures.HAS_ATOM_ELEMENT_SYMBOL; + } +} diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java b/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java new file mode 100644 index 00000000000..47577a6bc44 --- /dev/null +++ b/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.isomorphism.matchers; + +import java.util.List; +import java.util.Map; + + +import org.openscience.cdk.PseudoAtom; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IChemObject; + +/** + * Interface definition for Rgroup query classes. These must provide a root + * structure, root attachment points and Rgroup definitions. + * + * @cdk.module isomorphism + * @cdk.githash + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ +public interface IRGroupQuery extends IChemObject { + + + /** + * Setter for the root structure of this R-Group. + * @see #getRootStructure + * @param rootStructure the root structure (or scaffold) container + * + */ + public void setRootStructure(IAtomContainer rootStructure); + + /** + * Getter for the root structure of this R-Group. + * @see #setRootStructure + * @return the root structure (or scaffold) container + */ + public IAtomContainer getRootStructure(); + + /** + * Setter for root attachment points = bonds that connect R pseudo-atoms to the scaffold. + * @see #getRootAttachmentPoints() + * @param rootAttachmentPoints Map with per R-group pseudo atom another map with an Integer and an IBond, the integer indicating 1st or 2nd attachment. + */ + public void setRootAttachmentPoints(Map> rootAttachmentPoints); + + /** + * Getter for root attachment points = bonds that connect R pseudo-atoms to the scaffold. + * @see #setRootAttachmentPoints(Map) + * @return Map with per R-group pseudo atom another map with an Integer and an IBond, the integer indicating 1st or 2nd attachment. + */ + public Map> getRootAttachmentPoints(); + + /** + * Setter for the R-group definitions (substituents). + * @see #getRGroupDefinitions + * @param rGroupDefinitions map with an Integer and an RGroupList (substituent list), the Integer being the R-Group number (1..32). + */ + public void setRGroupDefinitions(Map rGroupDefinitions); + + /** + * Getter for the R-group definitions (substituents). + * @see #setRGroupDefinitions + * @return rGroupDefinitions Map with an Integer and an RGroupList (substituent list), the Integer being the R-Group number (1..32). + */ + public Map getRGroupDefinitions(); + + /** + * Return the total number of atom containers (count the root plus all substituents). + * @return count. + */ + public int getAtomContainerCount(); + + /** + * Return all the substituent atom containers, in other words the atom containers + * defined in this RGroupQuery except for the root structure. + * @return list with all substituents + */ + public List getSubstituents(); + + /** + * Checks validity of the RGroupQuery. + * Each distinct R# in the root must have a + * a corresponding {@link RGroupList} definition.
+ * In file terms: $RGP blocks must be defined for each R-group number. + * @return true when valid + */ + public boolean areSubstituentsDefined(); + + + /** + * Checks validity of RGroupQuery. + * Each {@link RGroupList} definition must have one or more corresponding + * R# atoms in the root block. + * @return true when valid + */ + public boolean areRootAtomsDefined(); + + /** + * Produces all combinations of the root structure (scaffold) with the R-groups + * substituted in valid ways, using each R-group's definitions and conditions. + * @return all valid combinations of the root structure (scaffold) with the + * R-groups substituted. + * @throws Exception + */ + public List getAllConfigurations() throws Exception; + +} diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java b/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java new file mode 100644 index 00000000000..4dfa8f06d4e --- /dev/null +++ b/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.isomorphism.matchers; + +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; + +/** + * Represents a single substitute structure in an {@link RGroupList}.

+ * The order of attachment points is provided (first and second only, conform + * RGFile spec). This order is relevant when the structure connects to the root + * with more than one bond. + *

+ * See also {@link RGroupList} and {@link RGroupQuery}. + * + * @cdk.module isomorphism + * @cdk.githash + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ +public class RGroup { + + /** + * Standard label/title to be used for the root atom container. + */ + public final static String ROOT_LABEL="Root structure"; + + /** + * Makes a label/title to be used for a substituent. + * @param rgroupNum R-Group number (1..32) + * @return label for substituent, like "R3" + */ + public static String makeLabel(int rgroupNum ) { + return "(R"+rgroupNum+")"; + } + + IAtom firstAttachmentPoint; + IAtom secondAttachmentPoint; + IAtomContainer group; + + public void setFirstAttachmentPoint(IAtom firstAttachmentPoint) { + this.firstAttachmentPoint = firstAttachmentPoint; + } + + public IAtom getFirstAttachmentPoint() { + return firstAttachmentPoint; + } + + public void setSecondAttachmentPoint(IAtom secondAttachmentPoint) { + this.secondAttachmentPoint = secondAttachmentPoint; + } + + public IAtom getSecondAttachmentPoint() { + return secondAttachmentPoint; + } + + public void setGroup(IAtomContainer group) { + this.group = group; + } + + public IAtomContainer getGroup() { + return group; + } +} diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java b/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java new file mode 100644 index 00000000000..1c6b8cf89bf --- /dev/null +++ b/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java @@ -0,0 +1,316 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.isomorphism.matchers; + +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.openscience.cdk.exception.CDKException; + + +/** + * Represents a list of Rgroup substitutes to be associated with some + * {@link RGroupQuery}. + * + * @cdk.module isomorphism + * @cdk.githash + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ +public class RGroupList { + /** + * Default value for occurrence field. + */ + public final static String DEFAULT_OCCURRENCE=">0"; + + /** + * Unique number to identify the Rgroup. + */ + private int rGroupNumber; + + /** + * Indicates that sites labeled with this Rgroup may only be + * substituted with a member of the Rgroup or with hydrogen. + */ + private boolean restH; + + /** + * Occurrence required: + *

    + *
  • n : exactly n ;
  • + *
  • n - m : n through m ;
  • + *
  • > n : greater than n ;
  • + *
  • < n : fewer than n ;
  • + *
  • default (blank) is > 0 ;
  • + *
+ * Any non-contradictory combination of the preceding values is also + * allowed; for example "1, 3-7, 9, >11". + */ + private String occurrence; + + /** + * List of substitute structures. + */ + private List rGroups; + + /** + * The rGroup (say B) that is required when this one (say A) exists.

+ * This captures the "LOG" information 'IF A (this) THEN B'. + */ + private int requiredRGroupNumber; + + + /** + * Default constructor. + */ + public RGroupList(int rGroupNumber) { + setRGroupNumber(rGroupNumber); + this.restH = false; + this.occurrence = DEFAULT_OCCURRENCE; + this.requiredRGroupNumber=0; + } + + /** + * Constructor with attributes given. + * + * @param rGroupNumber R-Group number + * @param restH restH + * @param occurrence occurrence + * @param requiredRGroupNumber number of other R-Group required + * @throws CDKException + */ + public RGroupList(int rGroupNumber, boolean restH, String occurrence, int requiredRGroupNumber) throws CDKException { + setRGroupNumber(rGroupNumber); + setRestH(restH); + setOccurrence(occurrence); + setRequiredRGroupNumber(requiredRGroupNumber); + } + + /** + * Setter for rGroupNumber, checks for valid range. + * Spec: "value from 1 to 32 *, labels position of Rgroup on root." + * @param rGroupNumber R-Group number + */ + public void setRGroupNumber(int rGroupNumber) { + + if (rGroupNumber < 1 || rGroupNumber > 32) { + throw new RuntimeException("Rgroup number must be between 1 and 32."); + } + this.rGroupNumber = rGroupNumber; + } + + public int getRGroupNumber() { + return rGroupNumber; + } + + public void setRestH(boolean restH) { + this.restH = restH; + } + + public boolean isRestH() { + return restH; + } + + public void setRequiredRGroupNumber(int rGroupNumberImplicated) { + this.requiredRGroupNumber = rGroupNumberImplicated; + } + + public int getRequiredRGroupNumber() { + return requiredRGroupNumber; + } + + public void setRGroups(List rGroups) { + this.rGroups = rGroups; + } + + public List getRGroups() { + return rGroups; + } + + /** + * Returns the occurrence value. + * @return occurrence + */ + public String getOccurrence() { + return occurrence; + } + + /** + * Picky setter for occurrence fields. Validates user input to be conform + * the (Symyx) specification. + * @param occurrence occurence value + */ + public void setOccurrence(String occurrence) throws CDKException { + if (occurrence == null || occurrence.equals("")) { + occurrence = ">0"; //revert to default + } else { + occurrence = occurrence.trim().replaceAll(" ", ""); + if (isValidOccurrenceSyntax(occurrence)) { + this.occurrence = occurrence; + } else + throw new CDKException("Invalid occurence line: " + occurrence); + } + } + + /** + * Validates the occurrence value. + *

    + *
  • n : exactly n ;
  • + *
  • n - m : n through m ;
  • + *
  • > n : greater than n ;
  • + *
  • < n : fewer than n ;
  • + *
  • default (blank) is > 0 ;
  • + *
+ * Any combination of the preceding values is also + * allowed; for example "1, 3-7, 9, >11". + * @param occ String to validate. + * @return true if valid String provided. + */ + public static boolean isValidOccurrenceSyntax(String occ) { + StringTokenizer st = new StringTokenizer(occ, ","); + while (st.hasMoreTokens()) { + String cond = st.nextToken().trim().replaceAll(" ", ""); + do { + //Number: "n" + if (match("^\\d+$", cond)) { + if (new Integer(cond)<0) // not allowed + return false; + break; + } + //Range: "n-m" + if (match("^\\d+-\\d+$", cond)) { + int from = new Integer(cond.substring(0,cond.indexOf("-"))); + int to = new Integer(cond.substring(cond.indexOf("-")+1,cond.length())); + if (from<0 || to <0 || ton" + if (match("^>\\d+$", cond)) { + break; + } + + return false; + } while (1==0); + } + + return true; + } + + /** + * Helper method for regular expression matching. + * @param regExp regular expression String + * @param userInput user's input + * @return + */ + private static boolean match(String regExp, String userInput) { + Pattern pattern = Pattern.compile(regExp); + Matcher matcher = pattern.matcher(userInput); + if (matcher.find()) + return true; + else + return false; + } + + + /** + * Matches the 'occurrence' condition with a provided maximum number of + * RGroup attachments. Returns the valid occurrences (numeric) for these + * two combined. If none found, returns empty list.

+ * Example: if R1 occurs 3 times attached to some root structure, then + * stating ">5" as an occurrence for that RGoupList does not make + * sense: the example R1 can occur 0..3 times. Empty would be returned.
+ * If the occurence would be >2, then 3 would be returned. Etcetera. + * + * @param maxAttachments number of attachments + * @return valid values by combining a max for R# with the occurrence cond. + */ + public List matchOccurence(int maxAttachments) { + + List validValues = new ArrayList(); + + for (int val = 0; val <= maxAttachments; val++) { + boolean addVal=false; + + StringTokenizer st = new StringTokenizer(occurrence, ","); + while (st.hasMoreTokens() && !addVal) { + String cond = st.nextToken().trim().replaceAll(" ", ""); + if (match("^\\d+$", cond)) { // n + if(new Integer(cond)==val) + addVal=true; + } + if (match("^\\d+-\\d+$", cond)) { // n-m + int from = new Integer(cond.substring(0,cond.indexOf("-"))); + int to = new Integer(cond.substring(cond.indexOf("-")+1,cond.length())); + if ( val>=from && val <=to) { + addVal=true; + } + } + if (match("^>\\d+$", cond)) { // ")+1,cond.length())); + if(val>n){ + addVal=true; + } + } + if (match("^<\\d+$", cond)) { // >n + int n = new Integer(cond.substring(cond.indexOf("<")+1,cond.length())); + if(val + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.isomorphism.matchers; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.vecmath.Point2d; + +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.ChemObject; +import org.openscience.cdk.PseudoAtom; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.interfaces.IChemObject; +import org.openscience.cdk.tools.ILoggingTool; +import org.openscience.cdk.tools.LoggingToolFactory; + + +/** + * Represents information contained in a Symyx RGfile (R-group query file).
+ * It contains a root structure (the scaffold if you like), a map with + * R-group definitions (each of which can contain multiple substitutes) and + * a map with attachment points. The attachment points define a connection + * order for the substitutes, which is relevant when an Rgroup is connected + * to the scaffold with more than one bond. + *

+ * This class can also be used to produce all the valid configurations + * for the combination of its root,definitions and conditions. + *

+ * This Javadoc does not contain a code sample how to create a new RGroupQuery + * from scratch, because a sensible RGroupQuery has quite a few attributes to be set + * including a root plus a bunch of substituents, which are all atom containers. + * So that would be a lot of sample code here.
+ * The best way to get a feel for the way the RGroup objects are populated is to + * run the {@link org.openscience.cdk.io.RGroupQueryReaderTest} and look at the sample + * input RGroup query files contained in the CDK and how they translate into + * RGroupXX objects. The JChempaint application can visualize the input files for you. + * + * @cdk.module isomorphism + * @cdk.githash + * @cdk.keyword Rgroup + * @cdk.keyword R group + * @cdk.keyword R-group + * @author Mark Rijnbeek + */ +public class RGroupQuery extends ChemObject implements IChemObject, Serializable, IRGroupQuery { + + private static final long serialVersionUID = -1656116487614720605L; + + private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQuery.class); + + /** + * The root structure (or scaffold) to which R-groups r attached. + */ + private IAtomContainer rootStructure; + + /** + * Rgroup definitions, each a list of possible substitutes for the + * given R number. + */ + private Map rGroupDefinitions; + + /** + * For each Rgroup Atom there may be a map containing (number,bond), + * being the attachment order (1,2) and the bond to attach to. + */ + private Map> rootAttachmentPoints; + + + /** + * Returns all R# type atoms (pseudo atoms) found in the root structure + * for a certain provided RGgroup number.

+ * @param rgroupNumber R# number, 1..32 + * @return list of (pseudo) atoms with the provided rgroupNumber as label + */ + public List getRgroupQueryAtoms(Integer rgroupNumber) { + + List rGroupQueryAtoms = null; + + if (rootStructure != null) { + rGroupQueryAtoms = new ArrayList(); + + for (int i = 0; i < rootStructure.getAtomCount(); i++) { + IAtom atom = rootStructure.getAtom(i); + if (atom instanceof PseudoAtom) { + PseudoAtom rGroup = (PseudoAtom)atom; + if (!rGroup.getLabel().equals("R") && // just "R" is not a proper query atom + rGroup.getLabel().startsWith("R") && + (rgroupNumber == null || new Integer(rGroup.getLabel().substring(1)).equals(rgroupNumber))) + rGroupQueryAtoms.add(atom); + } + } + } + return rGroupQueryAtoms; + } + + + /** + * Returns all R# type atoms (pseudo atoms) found in the root structure. + * @return list of (pseudo) R# atoms + */ + public List getAllRgroupQueryAtoms() { + return getRgroupQueryAtoms(null); + } + + + private static Pattern validLabelPattern = Pattern.compile("^R\\d+$"); + + /** + * Validates a Pseudo atom's label to be valid RGroup query label (R1..R32). + * @param Rxx R-group label like R1 or R10 + * @return true if R1..R32, otherwise false + */ + public static boolean isValidRgroupQueryLabel(String Rxx) { + Matcher matcher = validLabelPattern.matcher(Rxx); + if (matcher.find()) { + int groupNumber = new Integer(Rxx.substring(1)); + if (groupNumber >= 1 && groupNumber <= 32) { + return true; + } + } + return false; + } + + public boolean areSubstituentsDefined() { + List allRgroupAtoms = getAllRgroupQueryAtoms(); + if (allRgroupAtoms == null) + return false; + + for (IAtom rgp : allRgroupAtoms) { + if (RGroupQuery.isValidRgroupQueryLabel(((PseudoAtom)rgp).getLabel())) { + int groupNum = new Integer(((PseudoAtom)rgp).getLabel().substring(1)); + if (rGroupDefinitions == null || rGroupDefinitions.get(groupNum) == null || + rGroupDefinitions.get(groupNum).getRGroups() == null || + rGroupDefinitions.get(groupNum).getRGroups().size() == 0) { + return false; + } + } + } + return true; + } + + public boolean areRootAtomsDefined() { + for (Integer rgpNum : rGroupDefinitions.keySet()) { + boolean represented=false; + rootLoop: + for (IAtom rootAtom : this.getRootStructure().atoms()) { + if (rootAtom instanceof PseudoAtom && rootAtom.getSymbol().startsWith("R")) { + PseudoAtom pseudo = (PseudoAtom) rootAtom; + if(pseudo.getLabel().length()>1) { + int rootAtomRgrpNumber = new Integer(pseudo.getLabel().substring(1)); + if (rootAtomRgrpNumber==rgpNum) { + represented=true; + break rootLoop; + } + } + } + } + if(!represented) { + return false; + } + } + return true; + } + + + public List getAllConfigurations() throws CDKException { + + if (!areSubstituentsDefined()) { + throw new CDKException("Can not configure molecules: missing R# group definitions."); + } + + //result = a list of concrete atom containers that are valid interpretations of the RGroup query + List result = new ArrayList(); + + + //rGroupNumbers = list holding each R# number for this RGroup query + List rGroupNumbers = new ArrayList(); + + //distributions = a list of valid distributions, that is a one/zero representation + // indicating which atom in an atom series belonging to a particular + // R# group is present (1) or absent (0). + List distributions = new ArrayList(); + + + List> substitutes = new ArrayList>(); + + //Valid occurrences for each R# group + List> occurrences = new ArrayList>(); + List occurIndexes = new ArrayList(); + + //Build up each R# group data before recursively finding configurations. + Iterator rGroupNumItr = rGroupDefinitions.keySet().iterator(); + if (rGroupNumItr.hasNext()) { + while (rGroupNumItr.hasNext()) { + int r = rGroupNumItr.next(); + rGroupNumbers.add(r); + List validOcc = rGroupDefinitions.get(r).matchOccurence(getRgroupQueryAtoms(r).size()); + if (validOcc.size() == 0) { + throw new CDKException("Occurrence '" + rGroupDefinitions.get(r).getOccurrence() + + "' defined for Rgroup " + r + + " results in no subsititute options for this R-group."); + } + occurrences.add(validOcc); + occurIndexes.add(0); + } + //Init distributions: empty and with the right list size + for (int i = 0; i < rGroupNumbers.size(); i++) { + distributions.add(null); + substitutes.add(null); + } + + //Start finding valid configurations using recursion, output will be put in 'result'. + findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions, substitutes, 0, + result); + + } + return result; + } + + + /** + * Recursive function to produce valid configurations + * for {@link #getAllConfigurations()}. + */ + private void findConfigurationsRecursively(List rGroupNumbers, List> occurrences, + List occurIndexes, List distributions, + List> substitutes, int level, + List result) throws CDKException { + + if (level == rGroupNumbers.size()) { + + if (!checkIfThenConditionsMet(rGroupNumbers, distributions)) + return; + + + // Clone the root to get a scaffold to plug the substitutes into. + IAtomContainer root = this.getRootStructure(); + IAtomContainer rootClone = null; + try { + rootClone = (IAtomContainer)root.clone(); + } catch (CloneNotSupportedException e) { + //Abort with CDK exception + throw new CDKException("clone() failed; could not perform R-group substitution."); + } + + for (int rgpIdx = 0; rgpIdx < rGroupNumbers.size(); rgpIdx++) { + + int rNum = rGroupNumbers.get(rgpIdx); + int pos = 0; + + List mapped = substitutes.get(rgpIdx); + for (RGroup substitute : mapped) { + IAtom rAtom = this.getRgroupQueryAtoms(rNum).get(pos); + if (substitute !=null) { + + IAtomContainer rgrpClone = null; + try { + rgrpClone = (IAtomContainer)(substitute.getGroup().clone()); + } catch (CloneNotSupportedException e) { + throw new CDKException("clone() failed; could not perform R-group substitution."); + } + + //root cloned, substitute cloned. These now need to be attached to each other.. + rootClone.add(rgrpClone); + + Map rAttachmentPoints = this.getRootAttachmentPoints().get(rAtom); + if (rAttachmentPoints != null) { + // Loop over attachment points of the R# atom + for (int apo = 0; apo < rAttachmentPoints.size(); apo++) { + IBond bond = rAttachmentPoints.get(apo + 1); + //Check how R# is attached to bond + int whichAtomInBond = 0; + if (bond.getAtom(1).equals(rAtom)) + whichAtomInBond = 1; + IAtom subsAt = null; + if (apo == 0) + subsAt = substitute.getFirstAttachmentPoint(); + else + subsAt = substitute.getSecondAttachmentPoint(); + + //Do substitution with the clones + IBond cloneBond = rootClone.getBond(getBondPosition(bond, root)); + if (subsAt != null) { + IAtom subsCloneAtom = + rgrpClone.getAtom(getAtomPosition(subsAt, substitute.getGroup())); + cloneBond.setAtom(subsCloneAtom, whichAtomInBond); + } + } + } + + //Optional: shift substitutes 2D for easier visual checking + if (rAtom.getPoint2d() != null && substitute != null && + substitute.getFirstAttachmentPoint() != null && + substitute.getFirstAttachmentPoint().getPoint2d() != null) { + Point2d pointR = rAtom.getPoint2d(); + Point2d pointC = substitute.getFirstAttachmentPoint().getPoint2d(); + double xDiff = pointC.x - pointR.x; + double yDiff = pointC.y - pointR.y; + for (IAtom subAt : rgrpClone.atoms()) { + if (subAt.getPoint2d() != null) { + subAt.getPoint2d().x -= xDiff; + subAt.getPoint2d().y -= yDiff; + } + } + } + } else { + //Distribution flag is 0, this means the R# group will not be substituted. + //Any atom connected to this group should be given the defined RestH value. + IAtom discarded = rootClone.getAtom(getAtomPosition(rAtom, root)); + for (IBond r0Bond : rootClone.bonds()) { + if (r0Bond.contains(discarded)) { + for (IAtom atInBond : r0Bond.atoms()) { + atInBond.setProperty(CDKConstants.REST_H, + this.getRGroupDefinitions().get(rNum).isRestH()); + } + } + } + } + + pos++; + } + } + + //Remove R# remnants from the clone, bonds and atoms that may linger. + boolean confHasRGroupBonds = true; + while (confHasRGroupBonds) { + for (IBond cloneBond : rootClone.bonds()) { + boolean removeBond = false; + if (cloneBond.getAtom(0) instanceof PseudoAtom && + isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(0)).getLabel())) + removeBond = true; + else if (cloneBond.getAtom(1) instanceof PseudoAtom && + isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(1)).getLabel())) + removeBond = true; + + if (removeBond) { + rootClone.removeBond(cloneBond); + confHasRGroupBonds = true; + break; + } + confHasRGroupBonds = false; + } + } + boolean confHasRGroupAtoms = true; + while (confHasRGroupAtoms) { + for (IAtom cloneAt : rootClone.atoms()) { + if (cloneAt instanceof PseudoAtom) + if (isValidRgroupQueryLabel(((PseudoAtom)cloneAt).getLabel())) { + rootClone.removeAtom(cloneAt); + confHasRGroupAtoms = true; + break; + } + confHasRGroupAtoms = false; + } + } + //Add to result list + result.add(rootClone); + + + } else { + for (int idx = 0; idx < occurrences.get(level).size(); idx++) { + occurIndexes.set(level, idx); + //With an occurrence picked 0..n for this level's R-group, now find + //all possible distributions (positional alternatives). + int occurrence = occurrences.get(level).get(idx); + int positions = this.getRgroupQueryAtoms(rGroupNumbers.get(level)).size(); + Integer[] candidate = new Integer[positions]; + for (int j = 0; j < candidate.length; j++) { + candidate[j] = 0; + } + List rgrpDistributions = new ArrayList(); + findDistributions(occurrence, candidate, rgrpDistributions, 0); + + for (Integer[] distribution : rgrpDistributions) { + distributions.set(level, distribution); + + + RGroup[] mapping = new RGroup[distribution.length]; + List> mappedSubstitutes = new ArrayList>(); + mapSubstitutes(this.getRGroupDefinitions().get(rGroupNumbers.get(level)),0, distribution, mapping, mappedSubstitutes); + + for (List mappings : mappedSubstitutes) { + substitutes.set(level,mappings); + findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions, + substitutes, level + 1, result); + + } + } + } + } + } + + /** + * Finds valid distributions for a given R# group and it occurrence + * condition taken from the LOG line.
+ * For example: if we have three Rn group atoms, and ">2" for + * the occurrence, then there are fours possible ways to make a + * distribution: 3 ways to put in two atoms, and one way + * to put in all 3 atoms. Etc. + * @param occur + * @param candidate + * @param distributions + * @param level + */ + private void findDistributions(int occur, Integer[] candidate, List distributions, int level) { + if (level != candidate.length) { + for (int i = 0; i < 2; i++) { + candidate[level] = i; + + int sum = 0; + for (int x = 0; x < candidate.length; x++) + sum += candidate[x]; + + if (sum == occur) { + distributions.add(candidate.clone()); + } else { + findDistributions(occur, candidate, distributions, level + 1); + } + } + } + } + + + /** + * Maps the distribution of an R-group to all possible substitute combinations. + * This is best illustrated by an example.
+ * Say R2 occurs twice in the root, and has condition >0. So a valid + * output configuration can have either one or two substitutes. + * The distributions will have been calculated to be the following + * solutions: [0,1], [1,0], [1,1]
+ * To start with [1,1], assume two possible substitutes have been + * defined for R2, namely *C=O and *C-N. Then the distribution [1,1] + * should lead to four mappings:
+ * [*C=O,*C=O], [*C-N,*C-N], [*C=O,*C-N], [*C-N,*C=O].
+ * These mappings are generated in this function, as well as the other valid mappings + * for [0,1] and [1,0]:
+ * [*C=O,null], [*C-N,null], [null,*C=O], [null,*C-N].
+ * So the example would have this function produce eight mappings (result list size==8). + * + * @param rgpList + * @param listOffset + * @param distribution + * @param mapping + * @param result + */ + private void mapSubstitutes(RGroupList rgpList, int listOffset, Integer[] distribution, RGroup[] mapping, List> result) { + if(listOffset==distribution.length) { + List mapped= new ArrayList(); + for(RGroup rgrp : mapping) + mapped.add(rgrp); + result.add(mapped); + } + else { + if (distribution[listOffset]==0) { + mapping[listOffset]=null; + mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result); + } + else { + for (RGroup rgrp :rgpList.getRGroups()) { + mapping[listOffset]=rgrp; + mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result); + } + } + } + } + + + /** + * Helper method, used to help construct a configuration. + * @param atom + * @param container + * @return the array position of atom in container + */ + private int getAtomPosition(IAtom atom, IAtomContainer container) { + for (int i = 0; i < container.getAtomCount(); i++) { + if (atom.equals(container.getAtom(i))) { + return i; + } + } + return -1; + } + + /** + * Helper method, used to help construct a configuration. + * @param bond + * @param container + * @return the array position of the bond in the container + */ + private int getBondPosition(IBond bond, IAtomContainer container) { + for (int i = 0; i < container.getBondCount(); i++) { + if (bond.equals(container.getBond(i))) { + return i; + } + } + return -1; + } + + /** + * Helper method to see if an array is all zeroes or not. + * Used to check if the distribution of substitutes over an R-group + * is all zeroes, meaning there will be no substitution done. + * @param arr + * @return true if arr's values are all zero. + */ + private boolean allZeroArray(Integer[] arr) { + for (int flag : arr) + if (flag != 0) + return false; + return true; + } + + /** + * Checks whether IF..THEN conditions that can be set for the R-groups are met. + * It is used to filter away invalid configurations in {@link #findConfigurationsRecursively}. + *

+ * Scenario: suppose R1 is substituted 0 times, whereas R2 is substituted. + * Also suppose there is a condition IF R2 THEN R1. Because R1 does not + * occur but R2 does, the IF..THEN condition is not met: this function + * will return false, the configuration should be discarded. + * @param rGroupNumbers + * @param distributions + * @return true if all IF..THEN RGroup conditions are met. + */ + private boolean checkIfThenConditionsMet(List rGroupNumbers, List distributions) { + for (int outer = 0; outer < rGroupNumbers.size(); outer++) { + int rgroupNum = rGroupNumbers.get(outer); + if (allZeroArray(distributions.get(outer))) { + for (int inner = 0; inner < rGroupNumbers.size(); inner++) { + int rgroupNum2 = rGroupNumbers.get(inner); + if (!allZeroArray(distributions.get(inner))) { + RGroupList rgrpList = rGroupDefinitions.get(rgroupNum2); + if (rgrpList.getRequiredRGroupNumber() == rgroupNum) { + logger.info(" Rejecting >> all 0 for " + rgroupNum + " but requirement found from " + + rgrpList.getRGroupNumber()); + return false; + } + } + } + } + } + return true; + } + + public int getAtomContainerCount() { + int retVal=0; + if(this.rootStructure!=null) + retVal++; + for(Integer r: rGroupDefinitions.keySet()) { + for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) { + if (rgrp.getGroup()!=null) { + retVal++; + } + } + } + return retVal; + } + + + public List getSubstituents() { + List substitutes = new ArrayList(); + for(Integer r : rGroupDefinitions.keySet()) { + for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) { + IAtomContainer subst =rgrp.getGroup(); + if (subst!=null) + substitutes.add(subst); + } + } + return substitutes; + } + + public void setRootStructure(IAtomContainer rootStructure) { + this.rootStructure = rootStructure; + } + + public IAtomContainer getRootStructure() { + return rootStructure; + } + + public void setRootAttachmentPoints(Map> rootAttachmentPoints) { + this.rootAttachmentPoints = rootAttachmentPoints; + } + + public Map> getRootAttachmentPoints() { + return rootAttachmentPoints; + } + + public void setRGroupDefinitions(Map rGroupDefinitions) { + this.rGroupDefinitions = rGroupDefinitions; + } + + public Map getRGroupDefinitions() { + return rGroupDefinitions; + } +} diff --git a/src/test/org/openscience/cdk/io/ChemObjectIOTest.java b/src/test/org/openscience/cdk/io/ChemObjectIOTest.java index fee7e1b64ff..66da7b07885 100644 --- a/src/test/org/openscience/cdk/io/ChemObjectIOTest.java +++ b/src/test/org/openscience/cdk/io/ChemObjectIOTest.java @@ -39,6 +39,7 @@ import org.openscience.cdk.io.formats.IResourceFormat; import org.openscience.cdk.io.listener.IChemObjectIOListener; import org.openscience.cdk.io.setting.IOSetting; +import org.openscience.cdk.isomorphism.matchers.RGroupQuery; import org.openscience.cdk.nonotify.NNAtomContainer; import org.openscience.cdk.nonotify.NNAtomContainerSet; import org.openscience.cdk.nonotify.NNChemFile; @@ -107,7 +108,7 @@ public static void setChemObjectIO(IChemObjectIO aChemObjectIO) { protected static IChemObject[] acceptableChemObjects = { new ChemFile(), new ChemModel(), new Molecule(), - new Reaction() + new Reaction(), new RGroupQuery() }; @Test public void testAcceptsAtLeastOneChemObject() { diff --git a/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java b/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java new file mode 100644 index 00000000000..48660541cde --- /dev/null +++ b/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io; + +import java.io.InputStream; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.openscience.cdk.CDKConstants; +import org.openscience.cdk.Molecule; +import org.openscience.cdk.PseudoAtom; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IAtom; +import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.io.formats.IChemFormat; +import org.openscience.cdk.io.formats.RGroupQueryFormat; +import org.openscience.cdk.isomorphism.matchers.RGroup; +import org.openscience.cdk.isomorphism.matchers.RGroupList; +import org.openscience.cdk.isomorphism.matchers.RGroupQuery; +import org.openscience.cdk.tools.ILoggingTool; +import org.openscience.cdk.tools.LoggingToolFactory; + + +/** + * JUnit tests for {@link org.openscience.cdk.io.RGroupQueryReader}. + * @cdk.module test-io + * @author Mark Rijnbeek + */ +public class RGroupQueryReaderTest extends SimpleChemObjectReaderTest { + public RGroupQueryReaderTest() { + } + private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQueryReaderTest.class); + + @BeforeClass + public static void setup() { + setSimpleChemObjectReader(new RGroupQueryReader(), "data/mdl/rgfile.1.mol"); + } + + @Test + public void testAccepts() { + RGroupQueryReader reader = new RGroupQueryReader(); + Assert.assertFalse(reader.accepts(Molecule.class)); + Assert.assertTrue(reader.accepts(RGroupQuery.class)); + } + + public void testAcceptsAtLeastOneDebugObject() { + } + + public void testAcceptsAtLeastOneNonotifyObject() { + } + + /** + * Test that the format factory guesses the correct IChemFormat + * based on the file content. + * + * @throws Exception + */ + @Test + public void testRGFileFormat() throws Exception { + String filename = "data/mdl/rgfile.1.mol"; + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + IChemFormat format = new FormatFactory().guessFormat(ins); + Assert.assertEquals(format.getClass(),RGroupQueryFormat.class); + } + + /** + * Test parsing of RGFile rgfile.1.mol. + * Simple R-group query file. + */ + @Test + public void testRgroupQueryFile1() throws Exception { + String filename = "data/mdl/rgfile.1.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 7); + + for (IAtom at : rGroupQuery.getAllRgroupQueryAtoms()) { + if (at instanceof PseudoAtom) { + Assert.assertEquals(((PseudoAtom)at).getLabel(), "R1"); + Map rootApo = rGroupQuery.getRootAttachmentPoints(); + Map apoBonds = (Map)rootApo.get(at); + Assert.assertEquals(apoBonds.size(), 1); + // Assert that the root attachment is the bond between R1 and P + for (IBond bond : rGroupQuery.getRootStructure().bonds()) { + if (bond.contains(at)) { + Assert.assertEquals(bond, apoBonds.get(1)); + for (IAtom atInApo : bond.atoms()) { + Assert.assertTrue(atInApo.getSymbol().equals("R") || atInApo.getSymbol().equals("P")); + } + } + } + } + } + + Iterator itr = rGroupQuery.getRGroupDefinitions().keySet().iterator(); + int val_1 = itr.next(); + Assert.assertEquals(val_1, 1); + RGroupList rList = rGroupQuery.getRGroupDefinitions().get(val_1); + Assert.assertEquals(rList.getOccurrence(), "0,1-3"); + + List rGroups = rList.getRGroups(); + Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint().getSymbol(), "N"); + Assert.assertEquals(rGroups.get(1).getFirstAttachmentPoint().getSymbol(), "O"); + Assert.assertEquals(rGroups.get(2).getFirstAttachmentPoint().getSymbol(), "S"); + + Assert.assertNull(rGroups.get(0).getSecondAttachmentPoint()); + Assert.assertNull(rGroups.get(1).getSecondAttachmentPoint()); + Assert.assertNull(rGroups.get(2).getSecondAttachmentPoint()); + + List configurations = rGroupQuery.getAllConfigurations(); + Assert.assertEquals(configurations.size(), 4); + + //RestH is set to true for R1, so with zero substitutes, the phosphor should get the restH flag set to true. + boolean restH_Identified=false; + for(IAtomContainer atc : configurations){ + if (atc.getAtomCount()==6) { + for (IAtom atom : atc.atoms() ) { + if (atom.getSymbol().equals("P")) { + Assert.assertNotNull(atom.getProperty(CDKConstants.REST_H)); + Assert.assertEquals(atom.getProperty(CDKConstants.REST_H),true); + restH_Identified=true; + } + } + } + } + Assert.assertTrue(restH_Identified); + } + + + /** + * Test parsing of RGFile rgfile.2.mol. + * More elaborate R-group query file. + */ + @Test + public void testRgroupQueryFile2() throws Exception { + String filename = "data/mdl/rgfile.2.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 3); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 14); + Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 4); + + List rGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms(); + Assert.assertEquals(rGroupQueryAtoms.size(), 4); + + rGroupQueryAtoms = rGroupQuery.getRgroupQueryAtoms(1); + Assert.assertEquals(rGroupQueryAtoms.size(), 1); + + for (IAtom at : rGroupQuery.getAllRgroupQueryAtoms()) { + if (at instanceof PseudoAtom) { + Assert.assertTrue(RGroupQuery.isValidRgroupQueryLabel(((PseudoAtom)at).getLabel())); + int rgroupNum = new Integer((((PseudoAtom)at).getLabel()).substring(1)); + Assert.assertTrue(rgroupNum == 1 || rgroupNum == 2 || rgroupNum == 11); + switch (rgroupNum) { + case 1: + { + //Test: R1 has two attachment points, defined by AAL + Map rootApo = rGroupQuery.getRootAttachmentPoints(); + Map apoBonds = (Map)rootApo.get(at); + Assert.assertEquals(apoBonds.size(), 2); + Assert.assertEquals(apoBonds.get(1).getConnectedAtom(at).getSymbol(), "N"); + Assert.assertTrue(apoBonds.get(2).getConnectedAtom(at).getSymbol().equals("C")); + //Test: Oxygens are the 2nd APO's for R1 + RGroupList rList = rGroupQuery.getRGroupDefinitions().get(1); + Assert.assertEquals(rList.getRGroups().size(), 2); + List rGroups = rList.getRGroups(); + Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint().getSymbol(), "O"); + Assert.assertEquals(rGroups.get(1).getSecondAttachmentPoint().getSymbol(), "O"); + Assert.assertFalse(rList.isRestH()); + } + break; + case 2: + { + RGroupList rList = rGroupQuery.getRGroupDefinitions().get(2); + Assert.assertEquals(rList.getRGroups().size(), 2); + Assert.assertEquals(rList.getOccurrence(), "0,2"); + Assert.assertEquals(rList.getRequiredRGroupNumber(), 11); + Assert.assertFalse(rList.isRestH()); + } + break; + case 11: + { + RGroupList rList = rGroupQuery.getRGroupDefinitions().get(11); + Assert.assertEquals(rList.getRGroups().size(), 1); + Assert.assertEquals(rList.getRequiredRGroupNumber(), 0); + Assert.assertTrue(rList.isRestH()); + + List rGroups = rList.getRGroups(); + Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint().getSymbol(), "Pt"); + Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint(), null); + } + break; + } + } + } + + List configurations = rGroupQuery.getAllConfigurations(); + Assert.assertEquals(configurations.size(), 12); + + //Test restH values + int countRestHForSmallestConfigurations=0; + for(IAtomContainer atc : configurations){ + if (atc.getAtomCount()==13) { // smallest configuration + for (IAtom atom : atc.atoms() ) { + if (atom.getProperty(CDKConstants.REST_H)!=null) { + countRestHForSmallestConfigurations++; + if (atom.getSymbol().equals("P")) + Assert.assertEquals(atom.getProperty(CDKConstants.REST_H),true); + } + } + } + } + Assert.assertEquals(countRestHForSmallestConfigurations,6); + + } + + /** + * Test parsing of RGFile rgfile.3.mol. + * This R-group query has R1 bound double twice, and has AAL lines to parse. + */ + @Test + public void testRgroupQueryFile3() throws Exception { + String filename = "data/mdl/rgfile.3.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 10); + Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 2); + + Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 8); + + //Test correctness AAL lines + for (IAtom at : rGroupQuery.getRgroupQueryAtoms(1)) { + if (at instanceof PseudoAtom) { + Assert.assertEquals(((PseudoAtom)at).getLabel(), "R1"); + + Map apoBonds = rGroupQuery.getRootAttachmentPoints().get(at); + Assert.assertEquals(apoBonds.size(), 2); + + IAtom boundAtom1 = apoBonds.get(1).getConnectedAtom(at); + Assert.assertTrue(boundAtom1.getSymbol().equals("Te") || boundAtom1.getSymbol().equals("S")); + + IAtom boundAtom2 = apoBonds.get(2).getConnectedAtom(at); + Assert.assertTrue(boundAtom2.getSymbol().equals("Po") || boundAtom2.getSymbol().equals("O")); + } + } + + // Test that there only two Rgroup query atoms (R#). The third R is a + // pseudo atom, but because it is not numbered it is not part of any + // query condition. + List allrGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms(); + Assert.assertEquals(allrGroupQueryAtoms.size(), 2); + } + + /** + * Test parsing of RGFile rgfile.4.mol. + * This R-group query has its R# atom detached, no bounds. + */ + @Test + public void testRgroupQueryFile4() throws Exception { + String filename = "data/mdl/rgfile.4.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 6); + + List allrGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms(); + Assert.assertEquals(allrGroupQueryAtoms.size(), 1); + RGroupList rList = rGroupQuery.getRGroupDefinitions().get(1); + Assert.assertEquals(rList.getRGroups().size(), 2); + Assert.assertEquals(rList.getRequiredRGroupNumber(), 0); + Assert.assertFalse(rList.isRestH()); + Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 0); + Assert.assertTrue(rGroupQuery.areSubstituentsDefined()); + + Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 2); + + // This query has a detached R-group, test for empty attachment points + List rGroups = rList.getRGroups(); + Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint(), null); + Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint(), null); + Assert.assertEquals(rGroups.get(1).getFirstAttachmentPoint(), null); + Assert.assertEquals(rGroups.get(1).getSecondAttachmentPoint(), null); + } + + + /** + * Test parsing of RGFile rgfile.5.mol. + * This exotic R-group query files has many R# groups and subsitutes, + * to test mainly for getting all valid configurations. + */ + @Test + public void testRgroupQueryFile5() throws Exception { + String filename = "data/mdl/rgfile.5.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 4); + + //Test combinatorial explosion: R5 has many different configurations + Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 17820); + } + + /** + * Test parsing of RGFile rgfile.6.mol. + * This RGFile is incomplete, RGP lines are missing. We still want to + * accept it (Symyx/ChemAxon software accepts it too). + */ + @Test (expected=CDKException.class) + public void testRgroupQueryFile6() throws Exception { + String filename = "data/mdl/rgfile.6.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 3); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 14); + + // This file has missing $RGP blocks. You could argue that this is + // thus not a legal query (ie missing query specifications) + Assert.assertFalse(rGroupQuery.areSubstituentsDefined()); + + //Getting for all configurations won't happen, because not all groups were set + rGroupQuery.getAllConfigurations(); // Will raise exception + + } + + /** + * Test parsing of RGFile rgfile.7.mol. + * This RGFile has APO lines with value 3: both attachment points.

+ * + * Also, R32 appears twice, but with different numbers of attachment. + * The parser should not trip over this, and make nice configurations. + */ + @Test + public void testRgroupQueryFile7() throws Exception { + String filename = "data/mdl/rgfile.7.mol"; + logger.info("Testing: " + filename); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + Assert.assertNotNull(rGroupQuery); + Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1); + Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 9); + Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 20); + + } + +} diff --git a/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java b/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java new file mode 100644 index 00000000000..f232a53348b --- /dev/null +++ b/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io; + +import java.io.InputStream; +import java.io.StringWriter; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.openscience.cdk.DefaultChemObjectBuilder; +import org.openscience.cdk.exception.CDKException; +import org.openscience.cdk.interfaces.IChemObjectBuilder; +import org.openscience.cdk.isomorphism.matchers.RGroupQuery; + +/** + * JUnit tests for {@link org.openscience.cdk.io.RGroupQueryWriter}. + * Idea: read the test RGfiles into an object model, then writes the + * same model out as an RGfile again without changing anything. Then + * check that the original inputfile and the outputfile have the same content. + * + * @cdk.module test-io + * @author Mark Rijnbeek + */ +public class RGroupQueryWriterTest extends ChemObjectIOTest { + + private static IChemObjectBuilder builder; + + @BeforeClass public static void setup() { + builder = DefaultChemObjectBuilder.getInstance(); + setChemObjectIO(new RGroupQueryWriter()); + } + + @Test + public void testRgroupQueryFile_1() throws Exception { + String rgFile =recreate("data/mdl/rgfile.1.mol"); + //System.out.println(rgFile); + + Assert.assertEquals("AAL lines", 0, countSubstring("AAL",rgFile)); + Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 3, countSubstring("APO",rgFile)); + Assert.assertTrue (rgFile.contains("M LOG 1 1 0 1 0,1-3")); + Assert.assertEquals("Total #lines", 59, countSubstring("\n",rgFile)); + } + + @Test + public void testRgroupQueryFile_2() throws Exception { + String rgFile =recreate("data/mdl/rgfile.2.mol"); + //System.out.println(rgFile); + + Assert.assertEquals("AAL lines", 1, countSubstring("AAL",rgFile)); + Assert.assertEquals("LOG lines", 3, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 5, countSubstring("APO",rgFile)); + Assert.assertTrue (rgFile.contains("M RGP 4 1 11 2 2 3 2 4 1")); + Assert.assertEquals("Total #lines", 107, countSubstring("\n",rgFile)); + } + + @Test + public void testRgroupQueryFile_3() throws Exception { + String rgFile =recreate("data/mdl/rgfile.3.mol"); + Assert.assertEquals("AAL lines", 2, countSubstring("AAL",rgFile)); + Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 2, countSubstring("APO",rgFile)); + Assert.assertEquals("Total #lines", 66, countSubstring("\n",rgFile)); + Assert.assertTrue (rgFile.contains("M RGP 2 5 1 7 1")); + } + + @Test + public void testRgroupQueryFile_4() throws Exception { + String rgFile =recreate("data/mdl/rgfile.4.mol"); + Assert.assertEquals("AAL lines", 0, countSubstring("AAL",rgFile)); + Assert.assertEquals("\\$CTAB lines", 3, countSubstring("\\$CTAB",rgFile)); + // the R-group is detached, we don't write APO lines (unlike the 0 value APO in the input file) + Assert.assertEquals("APO lines", 0, countSubstring("APO",rgFile)); + Assert.assertEquals("Total #lines", 46, countSubstring("\n",rgFile)); + Assert.assertTrue (rgFile.contains("M RGP 1 6 1")); + } + + @Test + public void testRgroupQueryFile_5() throws Exception { + String rgFile =recreate("data/mdl/rgfile.5.mol"); + Assert.assertEquals("LOG lines", 4, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 0, countSubstring("APO",rgFile)); + Assert.assertEquals("M RGP lines", 2, countSubstring("M RGP",rgFile)); //overflow + Assert.assertEquals("Total #lines", 132, countSubstring("\n",rgFile)); + } + + @Test + public void testRgroupQueryFile_6() throws Exception { + String rgFile =recreate("data/mdl/rgfile.6.mol"); + System.out.println(rgFile); + Assert.assertEquals("AAL lines", 1, countSubstring("AAL",rgFile)); + Assert.assertEquals("LOG lines", 3, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 1, countSubstring("APO",rgFile)); + Assert.assertEquals("Total #lines", 57, countSubstring("\n",rgFile)); + } + + @Test + public void testRgroupQueryFile_7() throws Exception { + String rgFile =recreate("data/mdl/rgfile.7.mol"); + System.out.println(rgFile); + Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile)); + Assert.assertEquals("APO lines", 2, countSubstring("APO",rgFile)); + Assert.assertTrue (rgFile.contains("M RGP 3 4 32 6 32 7 32")); + Assert.assertEquals("Total #lines", 53, countSubstring("\n",rgFile)); + } + + private int countSubstring (String regExp,String text) { + Pattern p = Pattern.compile(regExp); + Matcher m = p.matcher(text); // get a matcher object + int count = 0; + while(m.find()) + count++; + return count; + } + + public void testAcceptsAtLeastOneDebugObject() {} + public void testAcceptsAtLeastOneNonotifyObject() {} + + private String recreate(String file) throws CDKException { + StringWriter sw = new StringWriter(); + RGroupQueryWriter rgw = new RGroupQueryWriter (sw); + InputStream ins = this.getClass().getClassLoader().getResourceAsStream(file); + RGroupQueryReader reader = new RGroupQueryReader(ins); + RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery()); + rgw.write(rGroupQuery); + String out = sw.toString(); + return out; + + } + + +} diff --git a/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java b/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java new file mode 100644 index 00000000000..e2c52ca57d4 --- /dev/null +++ b/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.io.formats; + +/** + * @cdk.module test-ioformats + */ +public class RGroupQueryFormatTest extends ChemFormatMatcherTest { + public RGroupQueryFormatTest() { + super.setChemFormatMatcher((IChemFormatMatcher)RGroupQueryFormat.getInstance()); + } +} diff --git a/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java b/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java new file mode 100644 index 00000000000..bae754db92f --- /dev/null +++ b/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010 Mark Rijnbeek + * + * Contact: cdk-devel@lists.sourceforge.net + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * All we ask is that proper credit is given for our work, which includes + * - but is not limited to - adding the above copyright notice to the beginning + * of your source code files, and to any copyright notice that you may + * distribute with programs based on this work. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +package org.openscience.cdk.isomorphism.matchers; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.openscience.cdk.CDKTestCase; +import org.openscience.cdk.exception.CDKException; + + +/** + * Checks the functionality of the {@link org.openscience.cdk.isomorphism.matchers.RGroupList}, + * in particular setting valid 'occurrence' strings. + * + * @cdk.module test-extra + */ +public class RGroupListTest extends CDKTestCase { + + @BeforeClass + public static void setUp() { + } + + @Test + public void testOccurrenceCorrect() throws CDKException { + RGroupList rgrLst = new RGroupList(1); + rgrLst.setOccurrence("1, 3-7, 9, >11"); + Assert.assertEquals(rgrLst.getOccurrence(), "1,3-7,9,>11"); + } + + @Test + public void testOccurrenceNull() throws CDKException{ + RGroupList rgrLst = new RGroupList(1); + rgrLst.setOccurrence(null); + Assert.assertEquals(rgrLst.getOccurrence(), RGroupList.DEFAULT_OCCURRENCE); + } + + @Test (expected = CDKException.class) + public void testOccurrenceNumericValues() throws CDKException{ + RGroupList rgrLst = new RGroupList(1); + rgrLst.setOccurrence("a,3,10"); + } + + @Test (expected = CDKException.class) + public void testOccurrenceNoNegativeNumber() throws CDKException{ + RGroupList rgrLst = new RGroupList(1); + rgrLst.setOccurrence("-10"); + } + + @Test (expected = CDKException.class) + public void testOccurrenceNotSmallerThanZero() throws CDKException{ + RGroupList rgrLst = new RGroupList(1); + rgrLst.setOccurrence("<0"); + } + + +}