+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.StringTokenizer;
+
+import org.openscience.cdk.annotations.TestClass;
+import org.openscience.cdk.annotations.TestMethod;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.interfaces.IChemObject;
+import org.openscience.cdk.interfaces.IMolecule;
+import org.openscience.cdk.interfaces.IPseudoAtom;
+import org.openscience.cdk.io.formats.IResourceFormat;
+import org.openscience.cdk.io.formats.RGroupQueryFormat;
+import org.openscience.cdk.isomorphism.matchers.IRGroupQuery;
+import org.openscience.cdk.isomorphism.matchers.RGroup;
+import org.openscience.cdk.isomorphism.matchers.RGroupList;
+import org.openscience.cdk.isomorphism.matchers.RGroupQuery;
+import org.openscience.cdk.tools.ILoggingTool;
+import org.openscience.cdk.tools.LoggingToolFactory;
+
+/**
+ * A reader for Symyx' Rgroup files (RGFiles).
+ * An RGfile describes a single molecular query with Rgroups.
+ * Each RGfile is a combination of Ctabs defining the root molecule and each
+ * member of each Rgroup in the query.
+ *
+ * The RGFile format is described in the manual
+ *
+ * "CTFile Formats" , Chapter 5.
+ *
+ * @cdk.module io
+ * @cdk.githash
+ *
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+@TestClass("org.openscience.cdk.io.RGroupQueryReaderTest")
+public class RGroupQueryReader extends DefaultChemObjectReader {
+
+ /**
+ * Private bean style class to capture LOG (logic) lines.
+ */
+ private class RGroupLogic {
+ int rgoupNumberRequired;
+ boolean restH;
+ String occurence;
+ }
+
+ BufferedReader input = null;
+ private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQueryReader.class);
+
+ /**
+ * Default constructor, input not set.
+ */
+ public RGroupQueryReader() {
+ this(new StringReader(""));
+ }
+
+ /**
+ * Constructs a new RgroupQueryReader that can read RgroupAtomContainerSet
+ * from a given InputStream.
+ * @param in The InputStream to read from.
+ */
+ public RGroupQueryReader(InputStream in) {
+ this(new InputStreamReader(in));
+ }
+
+ /**
+ * Constructs a new RgroupQueryReader that can read RgroupAtomContainerSet
+ * from a given Reader.
+ * @param in The Reader to read from.
+ */
+ public RGroupQueryReader(Reader in) {
+ input = new BufferedReader(in);
+ }
+
+
+ /**
+ * Sets the input Reader.
+ * @param input Reader object
+ * @throws CDKException
+ */
+ @TestMethod("testSetReader_Reader")
+ public void setReader(Reader input) throws CDKException {
+ if (input instanceof BufferedReader) {
+ this.input = (BufferedReader)input;
+ } else {
+ this.input = new BufferedReader(input);
+ }
+ }
+
+ @TestMethod("testSetReader_InputStream")
+ public void setReader(InputStream input) throws CDKException {
+ setReader(new InputStreamReader(input));
+ }
+
+ @TestMethod("testGetFormat")
+ public IResourceFormat getFormat() {
+ return RGroupQueryFormat.getInstance();
+ }
+
+ @TestMethod("testAccepts")
+ public boolean accepts(Class classObject) {
+ Class[] interfaces = classObject.getInterfaces();
+ for (Class anInterface : interfaces) {
+ if (IRGroupQuery.class.equals(anInterface)) return true;
+ }
+ Class superClass = classObject.getSuperclass();
+ if (superClass != null) return this.accepts(superClass);
+ return false;
+ }
+
+ @TestMethod("testClose")
+ public void close() throws IOException {
+ input.close();
+ }
+
+ /**
+ * Check input IChemObject and proceed to parse.
+ * Accepts/returns IChemObject of type RGroupQuery only.
+ * @return IChemObject read from file
+ * @param object class must be of type RGroupQuery
+ */
+ public IChemObject read(IChemObject object) throws CDKException {
+ if (object instanceof RGroupQuery) {
+ return parseRGFile((RGroupQuery)object);
+ } else {
+ throw new CDKException
+ ("Reader only supports "+RGroupQuery.class.getName()+" objects");
+ }
+ }
+
+
+ /**
+ * Parse the RGFile. Uses of {@link org.openscience.cdk.io.MDLV2000Reader}
+ * to parse individual $CTAB blocks.
+ *
+ * @param rGroupQuery empty
+ * @return populated query
+ * @throws CDKException
+ */
+ private RGroupQuery parseRGFile(RGroupQuery rGroupQuery) throws CDKException {
+ String line = "";
+ int lineCount = 0;
+ String eol = System.getProperty("line.separator");
+ StringTokenizer strTk=null;
+ /* Variable to capture the LOG line(s) */
+ Map logicDefinitions = new HashMap();
+
+ /* Variable to captures attachment order for Rgroups.
+ * Contains:
+ * - pseudo atom (Rgroup)
+ * - map with (integer,bond) meaning "bond" has attachment
+ * order "integer" (1,2,3) for the Rgroup
+ * The order is based on the atom block, unless there is an AAL line
+ * for the pseudo atom.
+ */
+ Map> attachmentPoints = new HashMap>();
+
+
+ try {
+ // Process the Header block_________________________________________
+ //__________________________________________________________________
+ logger.info("Process the Header block");
+ checkLineBeginsWith(input.readLine(), "$MDL", ++lineCount);
+ checkLineBeginsWith(input.readLine(), "$MOL", ++lineCount);
+ checkLineBeginsWith(input.readLine(), "$HDR", ++lineCount);
+
+ for (int i = 1; i <= 3; i++) {
+ lineCount++;
+ if (input.readLine() == null) {
+ throw new CDKException("RGFile invalid, empty/null header line at #" + lineCount);
+ }
+ //optional: parse header info here (not implemented)
+ }
+ checkLineBeginsWith(input.readLine(), "$END HDR", ++lineCount);
+
+
+ //Process the root structure (scaffold)_____________________________
+ //__________________________________________________________________
+ logger.info("Process the root structure (scaffold)");
+ checkLineBeginsWith(input.readLine(), "$CTAB", ++lineCount);
+ //Force header
+ StringBuilder sb = new StringBuilder(RGroup.ROOT_LABEL+"\n\n\n");
+ line = input.readLine();
+ ++lineCount;
+ while (line != null && !line.equals("$END CTAB")) {
+ sb.append(line + eol);
+
+ //LOG lines: Logic, Unsatisfied Sites, Range of Occurrence.
+ if (line.startsWith("M LOG")) {
+ strTk = new StringTokenizer(line);
+ strTk.nextToken();
+ strTk.nextToken();
+ strTk.nextToken();
+ RGroupLogic log = null;
+
+ log = new RGroupLogic();
+ int rgroupNumber = new Integer(strTk.nextToken());
+ String tok = strTk.nextToken();
+ log.rgoupNumberRequired = tok.equals("0") ? 0 : new Integer(tok);
+ log.restH = strTk.nextToken().equals("1") ? true : false;
+ tok = "";
+ while (strTk.hasMoreTokens()) {
+ tok += strTk.nextToken();
+ }
+ log.occurence = tok;
+ logicDefinitions.put(rgroupNumber, log);
+ }
+
+ line = input.readLine();
+ ++lineCount;
+ }
+ String rootStr = sb.toString();
+
+ //Let MDL reader process $CTAB block of the root structure.
+ MDLV2000Reader reader = new MDLV2000Reader(new StringReader(rootStr), ISimpleChemObjectReader.Mode.STRICT);
+ IMolecule root = (IMolecule)reader.read(rGroupQuery.getBuilder().newMolecule());
+ rGroupQuery.setRootStructure(root);
+ List atomsByLinePosition = reader.getAtomsByLinePosition();
+
+ //Atom attachment order: parse AAL lines first
+ strTk = new StringTokenizer(rootStr, eol);
+ while (strTk.hasMoreTokens()) {
+ line = strTk.nextToken();
+ if (line.startsWith("M AAL")) {
+ StringTokenizer stAAL = new StringTokenizer(line);
+ stAAL.nextToken();
+ stAAL.nextToken();
+ int pos = new Integer(stAAL.nextToken());
+ IAtom rGroup = atomsByLinePosition.get(pos);
+ stAAL.nextToken();
+ Map bondMap = new HashMap();
+ while (stAAL.hasMoreTokens()) {
+ pos = new Integer(stAAL.nextToken());
+ IAtom partner = atomsByLinePosition.get(pos);
+ IBond bond = root.getBond(rGroup, partner);
+ int order = new Integer(stAAL.nextToken());
+ bondMap.put(order, bond);
+ logger.info("AAL " + order + " " + ((IPseudoAtom)rGroup).getLabel() +
+ "-" + partner.getSymbol());
+ }
+ if (bondMap.size()!=0) {
+ attachmentPoints.put(rGroup, bondMap);
+ }
+
+ }
+ }
+ //Deal with remaining attachment points (non AAL)
+ for (IAtom atom : root.atoms()) {
+ if (atom instanceof IPseudoAtom) {
+ IPseudoAtom rGroup = (IPseudoAtom)atom;
+ if (rGroup.getLabel().startsWith("R") &&
+ !rGroup.getLabel().equals("R") && // only numbered ones
+ !attachmentPoints.containsKey(rGroup)) {
+ //Order reflects the order of atoms in the Atom Block
+ int order = 0;
+ Map bondMap = new HashMap();
+ for (IAtom atom2 : atomsByLinePosition) {
+ if (!atom.equals(atom2)) {
+ for (IBond bond : root.bonds()) {
+ if (bond.contains(atom) && bond.contains(atom2)) {
+ bondMap.put(++order, bond);
+ logger.info("Def " + order + " " + rGroup.getLabel() + "-" +
+ atom2.getSymbol());
+ break;
+ }
+ }
+ }
+ }
+ if (bondMap.size()!=0) {
+ attachmentPoints.put(rGroup, bondMap);
+ }
+ }
+ }
+ }
+ //Done with attachment points
+ rGroupQuery.setRootAttachmentPoints(attachmentPoints);
+ logger.info("Attachm.points defined for " + attachmentPoints.size() + " R# atoms");
+
+
+ //Process each Rgroup's $CTAB block(s)_____________________________
+ //__________________________________________________________________
+
+ //Set up the RgroupLists, one for each unique R# (# = 1..32 max)
+ Map rGroupDefinitions = new HashMap();
+
+ for (IAtom atom : root.atoms()) {
+ if (atom instanceof IPseudoAtom) {
+ IPseudoAtom rGroup = (IPseudoAtom)atom;
+ if (RGroupQuery.isValidRgroupQueryLabel(rGroup.getLabel())) {
+ int rgroupNum = new Integer(rGroup.getLabel().substring(1));
+ RGroupList rgroupList = new RGroupList(rgroupNum);
+ if (!rGroupDefinitions.containsKey(rgroupNum)) {
+ logger.info("Define Rgroup R" + rgroupNum);
+ RGroupLogic logic = logicDefinitions.get(rgroupNum);
+ if (logic != null) {
+ rgroupList.setRestH(logic.restH);
+ rgroupList.setOccurrence(logic.occurence);
+ rgroupList.setRequiredRGroupNumber(logic.rgoupNumberRequired);
+ } else {
+ rgroupList.setRestH(false);
+ rgroupList.setOccurrence(">0");
+ rgroupList.setRequiredRGroupNumber(0);
+ }
+ rgroupList.setRGroups(new ArrayList());
+ rGroupDefinitions.put(rgroupNum, rgroupList);
+ }
+ }
+ }
+ }
+
+ //Parse all $CTAB blocks per Rgroup (there can be more than one)
+ line = input.readLine();
+ ++lineCount;
+ boolean hasMoreRGP = true;
+ while (hasMoreRGP) {
+
+ checkLineBeginsWith(line, "$RGP", lineCount);
+ line = input.readLine();
+ ++lineCount;
+ logger.info("line for num is " + line);
+ int rgroupNum = new Integer(line.trim());
+ line = input.readLine();
+ ++lineCount;
+
+ boolean hasMoreCTAB = true;
+ while (hasMoreCTAB) {
+
+ checkLineBeginsWith(line, "$CTAB", lineCount);
+ sb = new StringBuilder(RGroup.makeLabel(rgroupNum)+"\n\n\n");
+ line = input.readLine();
+ while (line != null && !line.startsWith("$END CTAB")) {
+ sb.append(line + eol);
+ line = input.readLine();
+ ++lineCount;
+ }
+ String groupStr = sb.toString();
+ reader = new MDLV2000Reader
+ (new StringReader(groupStr), ISimpleChemObjectReader.Mode.STRICT);
+ IMolecule group = (IMolecule)reader.read(rGroupQuery.getBuilder().newMolecule());
+ atomsByLinePosition = reader.getAtomsByLinePosition();
+ RGroup rGroup = new RGroup();
+ rGroup.setGroup(group);
+
+ //Parse the Rgroup's attachment points (APO)
+ strTk = new StringTokenizer(groupStr, eol);
+ while (strTk.hasMoreTokens()) {
+ line = strTk.nextToken();
+ if (line.startsWith("M APO")) {
+ StringTokenizer stAPO = new StringTokenizer(line);
+ stAPO.nextToken();
+ stAPO.nextToken();
+ stAPO.nextToken();
+ while (stAPO.hasMoreTokens()) {
+ int pos = new Integer(stAPO.nextToken());
+ int apo = new Integer(stAPO.nextToken());
+ IAtom at = atomsByLinePosition.get(pos);
+ switch (apo) {
+ case 1:
+ rGroup.setFirstAttachmentPoint(at);
+ break;
+ case 2:
+ rGroup.setSecondAttachmentPoint(at);
+ break;
+ case 3: {
+ rGroup.setFirstAttachmentPoint(at);
+ rGroup.setSecondAttachmentPoint(at);
+ }
+ break;
+ }
+ }
+ }
+ }
+ RGroupList rList = rGroupDefinitions.get(rgroupNum);
+ if (rList==null) {
+ throw new CDKException("R"+rgroupNum+" not defined but referenced in $RGP.");
+ }
+ else {
+ rList.getRGroups().add(rGroup);
+ }
+ line = input.readLine();
+ ++lineCount;
+ if (line.startsWith("$END RGP")) {
+ logger.info("end of RGP block");
+ hasMoreCTAB = false;
+ }
+ }
+
+ line = input.readLine();
+ ++lineCount;
+ if (line.startsWith("$END MOL")) {
+ hasMoreRGP = false;
+ }
+ }
+
+ rGroupQuery.setRGroupDefinitions(rGroupDefinitions);
+ logger.info("Number of lines was " + lineCount);
+ return rGroupQuery;
+
+ } catch (CDKException exception) {
+ String error = "CDK Error while parsing line " + lineCount + ": " + line + " -> " + exception.getMessage();
+ logger.error(error);
+ logger.debug(exception);
+ throw exception;
+ } catch (Exception exception) {
+ exception.printStackTrace();
+ String error =
+ exception.getClass() + "Error while parsing line " + lineCount + ": " + line + " -> " + exception.getMessage();
+ logger.error(error);
+ logger.debug(exception);
+ throw new CDKException(error, exception);
+ }
+ }
+
+ /**
+ * Checks that a given line starts as expected, according to RGFile format.
+ * @param line
+ * @param expect
+ * @param lineCount
+ * @throws CDKException
+ */
+ private void checkLineBeginsWith(String line, String expect, int lineCount) throws CDKException {
+ if (line == null) {
+ throw new CDKException("RGFile invalid, empty/null line at #" + lineCount);
+ }
+ if (!line.startsWith(expect)) {
+ throw new CDKException("RGFile invalid, line #" + lineCount + " should start with:" + expect + ".");
+ }
+ }
+
+
+}
+
+
diff --git a/src/main/org/openscience/cdk/io/RGroupQueryWriter.java b/src/main/org/openscience/cdk/io/RGroupQueryWriter.java
new file mode 100644
index 00000000000..266b097d297
--- /dev/null
+++ b/src/main/org/openscience/cdk/io/RGroupQueryWriter.java
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.text.SimpleDateFormat;
+import java.util.List;
+import java.util.Map;
+
+import org.openscience.cdk.annotations.TestMethod;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.interfaces.IChemObject;
+import org.openscience.cdk.io.formats.IResourceFormat;
+import org.openscience.cdk.io.formats.RGroupQueryFormat;
+import org.openscience.cdk.isomorphism.matchers.IRGroupQuery;
+import org.openscience.cdk.isomorphism.matchers.RGroup;
+import org.openscience.cdk.isomorphism.matchers.RGroupList;
+
+/**
+ * A writer for Symyx' Rgroup files (RGFiles).
+ * An RGfile describes a single molecular query with Rgroups.
+ * Each RGfile is a combination of Ctabs defining the root molecule and each
+ * member of each Rgroup in the query.
+ *
+ * This class relies on the {@link org.openscience.cdk.io.MDLWriter} to
+ * create CTAB data blocks.
+ *
+ * @cdk.module io
+ * @cdk.githash
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+
+public class RGroupQueryWriter extends DefaultChemObjectWriter {
+
+ private BufferedWriter writer;
+ private static String LSEP = System.getProperty("line.separator");
+
+ /**
+ * Constructs a new writer that can write an {@link IRGroupQuery}
+ * to the Symx RGFile format.
+ *
+ * @param out The Writer to write to
+ */
+ public RGroupQueryWriter(Writer out) {
+ if (out instanceof BufferedWriter) {
+ writer = (BufferedWriter)out;
+ } else {
+ writer = new BufferedWriter(out);
+ }
+ }
+
+ /**
+ * Zero argument constructor.
+ */
+ public RGroupQueryWriter() {
+ this(new StringWriter());
+ }
+
+ /**
+ * Returns true for accepted input types.
+ */
+ @SuppressWarnings("unchecked")
+ @TestMethod("testAccepts")
+ public boolean accepts(Class classObject) {
+ Class[] interfaces = classObject.getInterfaces();
+ for (Class anInterface : interfaces) {
+ if (IRGroupQuery.class.equals(anInterface)) return true;
+ }
+ Class superClass = classObject.getSuperclass();
+ if (superClass != null) return this.accepts(superClass);
+ return false;
+ }
+
+ /**
+ * Flushes the output and closes this object.
+ */
+ @TestMethod("testClose")
+ public void close() throws IOException {
+ writer.close();
+ }
+
+ /**
+ * Produces a CTAB block for an atomContainer, without the header lines.
+ * @param atomContainer
+ * @return CTAB block
+ * @throws CDKException
+ */
+ private String getCTAB (IAtomContainer atomContainer) throws CDKException {
+ StringWriter strWriter = new StringWriter();
+ MDLWriter mdlWriter = new MDLWriter(strWriter);
+ mdlWriter.write(atomContainer);
+ String ctab = strWriter.toString();
+ //strip of the individual header, as we have one super header instead.
+ for (int line=1; line <=3; line++ ){
+ ctab = ctab.substring(ctab.indexOf(LSEP)+1);
+ }
+ return ctab;
+ }
+
+ /**
+ * Returns output format.
+ */
+ @TestMethod("testGetFormat")
+ public IResourceFormat getFormat() {
+ return RGroupQueryFormat.getInstance();
+ }
+
+ /**
+ * Sets the writer to given output stream.
+ */
+ public void setWriter(OutputStream output) throws CDKException {
+ setWriter(new OutputStreamWriter(output));
+ }
+
+ /**
+ * Sets the writer.
+ */
+ public void setWriter(Writer out) throws CDKException {
+ if (out instanceof BufferedWriter) {
+ writer = (BufferedWriter)out;
+ } else {
+ writer = new BufferedWriter(out);
+ }
+ }
+
+ /**
+ * The actual writing of the output.
+ * @throws CDKException
+ * @throws IOException
+ */
+ public void write(IChemObject object) throws CDKException {
+ if (!(object instanceof IRGroupQuery)) {
+ throw new CDKException("Only IRGroupQuery input is accepted.");
+ }
+ try {
+
+ IRGroupQuery rGroupQuery = (IRGroupQuery) object;
+ String now=new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis());
+ IAtomContainer rootAtc = rGroupQuery.getRootStructure();
+
+ //Construct header
+ StringBuffer rootBlock=new StringBuffer();
+ String header =
+ "$MDL REV 1 "+now+LSEP+
+ "$MOL\n" +
+ "$HDR\n" +
+ " Rgroup query file (RGFile)\n"+
+ " CDK "+now+"2D\n\n"+
+ "$END HDR\n"+
+ "$CTAB";
+ rootBlock.append(header).append(LSEP);
+
+ //Construct the root structure, the scaffold
+ String rootCTAB = getCTAB(rootAtc);
+ rootCTAB = rootCTAB.replaceAll("\nM END\n","");
+ rootBlock.append(rootCTAB).append(LSEP);
+
+ //Write the root's LOG lines
+ for(Integer rgrpNum : rGroupQuery.getRGroupDefinitions().keySet()) {
+ RGroupList rgList = rGroupQuery.getRGroupDefinitions().get(rgrpNum);
+ int restH = rgList.isRestH()?1:0;
+ String logLine =
+ "M LOG"+
+ MDLWriter.formatMDLInt(1, 3)+
+ MDLWriter.formatMDLInt(rgrpNum, 4)+
+ MDLWriter.formatMDLInt(rgList.getRequiredRGroupNumber(), 4)+
+ MDLWriter.formatMDLInt(restH, 4)+
+ " "+rgList.getOccurrence()
+ ;
+ rootBlock.append(logLine).append(LSEP);
+ }
+
+ //AAL lines are optional, they are needed for R-atoms with multiple bonds to the root
+ //for which the order of the attachment points can not be implicitly derived
+ //from the order in the atom block. See CT spec for more on that.
+ for (IAtom rgroupAtom : rGroupQuery.getRootAttachmentPoints().keySet()) {
+ Map rApo= rGroupQuery.getRootAttachmentPoints().get(rgroupAtom);
+ if (rApo.size()>1) {
+ int prevPos=-1;
+ int apoIdx=1;
+ boolean implicitlyOrdered=true;
+ while (rApo.get(apoIdx)!=null && implicitlyOrdered) {
+ IAtom partner=rApo.get(apoIdx).getConnectedAtom(rgroupAtom);
+ for(int atIdx=0; atIdx rgrpList = rGroupQuery.getRGroupDefinitions().get(rgrpNum).getRGroups();
+ if(rgrpList!=null && rgrpList.size()!=0) {
+ rgpBlock.append("$RGP").append(LSEP);;
+ rgpBlock.append(MDLWriter.formatMDLInt(rgrpNum, 4)).append(LSEP);
+
+ for (RGroup rgroup : rgrpList) {
+ //CTAB block
+ rgpBlock.append("$CTAB").append(LSEP);
+ String ctab=getCTAB(rgroup.getGroup());
+ ctab = ctab.replaceAll(LSEP+"M END"+LSEP,"");
+ rgpBlock.append(ctab).append(LSEP);
+
+ //The APO line
+ IAtom firstAttachmentPoint= rgroup.getFirstAttachmentPoint();
+ IAtom secondAttachmentPoint=rgroup.getSecondAttachmentPoint();
+ int apoCount=0;
+ if (firstAttachmentPoint!=null) {
+ StringBuffer apoLine=new StringBuffer();
+ for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) {
+ if (rgroup.getGroup().getAtom(atIdx).equals(firstAttachmentPoint)) {
+ apoLine.append(MDLWriter.formatMDLInt((atIdx+1), 3));
+ apoCount++;
+ if (secondAttachmentPoint!=null &&
+ secondAttachmentPoint.equals(firstAttachmentPoint)) {
+ apoLine.append(MDLWriter.formatMDLInt(3, 3));
+ }
+ else {
+ apoLine.append(MDLWriter.formatMDLInt(1, 3));
+ }
+ }
+ }
+ if (secondAttachmentPoint!=null && !secondAttachmentPoint.equals(firstAttachmentPoint)) {
+ for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) {
+ if (rgroup.getGroup().getAtom(atIdx).equals(secondAttachmentPoint)) {
+ apoCount++;
+ apoLine.append(MDLWriter.formatMDLInt((atIdx+1), 3));
+ apoLine.append(MDLWriter.formatMDLInt(2, 3));
+ }
+ }
+ }
+ if (apoCount>0) {
+ apoLine.insert(0, "M APO"+MDLWriter.formatMDLInt(apoCount, 3));
+ rgpBlock.append(apoLine).append(LSEP);
+ }
+ }
+
+ rgpBlock.append("M END").append(LSEP);
+ rgpBlock.append("$END CTAB").append(LSEP);
+ }
+ rgpBlock.append("$END RGP").append(LSEP);
+ }
+ }
+ rgpBlock.append("$END MOL").append(LSEP);
+
+ writer.write(rootBlock.toString());
+ writer.write(rgpBlock.toString());
+ writer.flush();
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ throw new CDKException("Unexpected excpetion when writing RGFile.\n"+e.getMessage());
+ }
+
+ }
+}
diff --git a/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java b/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java
new file mode 100644
index 00000000000..ced87924b49
--- /dev/null
+++ b/src/main/org/openscience/cdk/io/formats/RGroupQueryFormat.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io.formats;
+
+import org.openscience.cdk.annotations.TestMethod;
+import org.openscience.cdk.tools.DataFeatures;
+
+/**
+ * Format for Symyx RGfiles (Rgroup query files).
+ * @cdk.module ioformats
+ * @cdk.githash
+ * @cdk.set io-formats
+ */
+public class RGroupQueryFormat implements IChemFormatMatcher {
+
+
+ private static IResourceFormat myself = null;
+
+ private RGroupQueryFormat() {
+ }
+
+ @TestMethod("testResourceFormatSet")
+ public static IResourceFormat getInstance() {
+ if (myself == null)
+ myself = new RGroupQueryFormat();
+ return myself;
+ }
+
+ @TestMethod("testGetFormatName")
+ public String getFormatName() {
+ return "Symyx Rgroup query files";
+ }
+
+ @TestMethod("testGetMIMEType")
+ public String getMIMEType() {
+ return null;
+ }
+
+ @TestMethod("testGetPreferredNameExtension")
+ public String getPreferredNameExtension() {
+ return getNameExtensions()[0];
+ }
+
+ @TestMethod("testGetNameExtensions")
+ public String[] getNameExtensions() {
+ return new String[] { "mol", "rgp" };
+ }
+
+ @TestMethod("testGetReaderClassName")
+ public String getReaderClassName() {
+ return "org.openscience.cdk.io.RGroupQueryReader";
+ }
+
+ @TestMethod("testGetWriterClassName")
+ public String getWriterClassName() {
+ return "org.openscience.cdk.io.RGroupQueryWriter";
+ }
+
+ public boolean matches(int lineNumber, String line) {
+ if (line.indexOf("$RGP") >= 0)
+ return true;
+ else
+ return false;
+ }
+
+ @TestMethod("testIsXMLBased")
+ public boolean isXMLBased() {
+ return false;
+ }
+
+ @TestMethod("testGetSupportedDataFeatures")
+ public int getSupportedDataFeatures() {
+ return getRequiredDataFeatures() | DataFeatures.HAS_2D_COORDINATES ;
+ }
+
+ @TestMethod("testGetRequiredDataFeatures")
+ public int getRequiredDataFeatures() {
+ return DataFeatures.HAS_ATOM_ELEMENT_SYMBOL;
+ }
+}
diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java b/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java
new file mode 100644
index 00000000000..47577a6bc44
--- /dev/null
+++ b/src/main/org/openscience/cdk/isomorphism/matchers/IRGroupQuery.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.isomorphism.matchers;
+
+import java.util.List;
+import java.util.Map;
+
+
+import org.openscience.cdk.PseudoAtom;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.interfaces.IChemObject;
+
+/**
+ * Interface definition for Rgroup query classes. These must provide a root
+ * structure, root attachment points and Rgroup definitions.
+ *
+ * @cdk.module isomorphism
+ * @cdk.githash
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+public interface IRGroupQuery extends IChemObject {
+
+
+ /**
+ * Setter for the root structure of this R-Group.
+ * @see #getRootStructure
+ * @param rootStructure the root structure (or scaffold) container
+ *
+ */
+ public void setRootStructure(IAtomContainer rootStructure);
+
+ /**
+ * Getter for the root structure of this R-Group.
+ * @see #setRootStructure
+ * @return the root structure (or scaffold) container
+ */
+ public IAtomContainer getRootStructure();
+
+ /**
+ * Setter for root attachment points = bonds that connect R pseudo-atoms to the scaffold.
+ * @see #getRootAttachmentPoints()
+ * @param rootAttachmentPoints Map with per R-group pseudo atom another map with an Integer and an IBond, the integer indicating 1st or 2nd attachment.
+ */
+ public void setRootAttachmentPoints(Map> rootAttachmentPoints);
+
+ /**
+ * Getter for root attachment points = bonds that connect R pseudo-atoms to the scaffold.
+ * @see #setRootAttachmentPoints(Map)
+ * @return Map with per R-group pseudo atom another map with an Integer and an IBond, the integer indicating 1st or 2nd attachment.
+ */
+ public Map> getRootAttachmentPoints();
+
+ /**
+ * Setter for the R-group definitions (substituents).
+ * @see #getRGroupDefinitions
+ * @param rGroupDefinitions map with an Integer and an RGroupList (substituent list), the Integer being the R-Group number (1..32).
+ */
+ public void setRGroupDefinitions(Map rGroupDefinitions);
+
+ /**
+ * Getter for the R-group definitions (substituents).
+ * @see #setRGroupDefinitions
+ * @return rGroupDefinitions Map with an Integer and an RGroupList (substituent list), the Integer being the R-Group number (1..32).
+ */
+ public Map getRGroupDefinitions();
+
+ /**
+ * Return the total number of atom containers (count the root plus all substituents).
+ * @return count.
+ */
+ public int getAtomContainerCount();
+
+ /**
+ * Return all the substituent atom containers, in other words the atom containers
+ * defined in this RGroupQuery except for the root structure.
+ * @return list with all substituents
+ */
+ public List getSubstituents();
+
+ /**
+ * Checks validity of the RGroupQuery.
+ * Each distinct R# in the root must have a
+ * a corresponding {@link RGroupList} definition.
+ * In file terms: $RGP blocks must be defined for each R-group number.
+ * @return true when valid
+ */
+ public boolean areSubstituentsDefined();
+
+
+ /**
+ * Checks validity of RGroupQuery.
+ * Each {@link RGroupList} definition must have one or more corresponding
+ * R# atoms in the root block.
+ * @return true when valid
+ */
+ public boolean areRootAtomsDefined();
+
+ /**
+ * Produces all combinations of the root structure (scaffold) with the R-groups
+ * substituted in valid ways, using each R-group's definitions and conditions.
+ * @return all valid combinations of the root structure (scaffold) with the
+ * R-groups substituted.
+ * @throws Exception
+ */
+ public List getAllConfigurations() throws Exception;
+
+}
diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java b/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java
new file mode 100644
index 00000000000..4dfa8f06d4e
--- /dev/null
+++ b/src/main/org/openscience/cdk/isomorphism/matchers/RGroup.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.isomorphism.matchers;
+
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IAtomContainer;
+
+/**
+ * Represents a single substitute structure in an {@link RGroupList}.
+ * The order of attachment points is provided (first and second only, conform
+ * RGFile spec). This order is relevant when the structure connects to the root
+ * with more than one bond.
+ *
+ * See also {@link RGroupList} and {@link RGroupQuery}.
+ *
+ * @cdk.module isomorphism
+ * @cdk.githash
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+public class RGroup {
+
+ /**
+ * Standard label/title to be used for the root atom container.
+ */
+ public final static String ROOT_LABEL="Root structure";
+
+ /**
+ * Makes a label/title to be used for a substituent.
+ * @param rgroupNum R-Group number (1..32)
+ * @return label for substituent, like "R3"
+ */
+ public static String makeLabel(int rgroupNum ) {
+ return "(R"+rgroupNum+")";
+ }
+
+ IAtom firstAttachmentPoint;
+ IAtom secondAttachmentPoint;
+ IAtomContainer group;
+
+ public void setFirstAttachmentPoint(IAtom firstAttachmentPoint) {
+ this.firstAttachmentPoint = firstAttachmentPoint;
+ }
+
+ public IAtom getFirstAttachmentPoint() {
+ return firstAttachmentPoint;
+ }
+
+ public void setSecondAttachmentPoint(IAtom secondAttachmentPoint) {
+ this.secondAttachmentPoint = secondAttachmentPoint;
+ }
+
+ public IAtom getSecondAttachmentPoint() {
+ return secondAttachmentPoint;
+ }
+
+ public void setGroup(IAtomContainer group) {
+ this.group = group;
+ }
+
+ public IAtomContainer getGroup() {
+ return group;
+ }
+}
diff --git a/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java b/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java
new file mode 100644
index 00000000000..1c6b8cf89bf
--- /dev/null
+++ b/src/main/org/openscience/cdk/isomorphism/matchers/RGroupList.java
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.isomorphism.matchers;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.openscience.cdk.exception.CDKException;
+
+
+/**
+ * Represents a list of Rgroup substitutes to be associated with some
+ * {@link RGroupQuery}.
+ *
+ * @cdk.module isomorphism
+ * @cdk.githash
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+public class RGroupList {
+ /**
+ * Default value for occurrence field.
+ */
+ public final static String DEFAULT_OCCURRENCE=">0";
+
+ /**
+ * Unique number to identify the Rgroup.
+ */
+ private int rGroupNumber;
+
+ /**
+ * Indicates that sites labeled with this Rgroup may only be
+ * substituted with a member of the Rgroup or with hydrogen.
+ */
+ private boolean restH;
+
+ /**
+ * Occurrence required:
+ *
+ * - n : exactly n ;
+ * - n - m : n through m ;
+ * - > n : greater than n ;
+ * - < n : fewer than n ;
+ * - default (blank) is > 0 ;
+ *
+ * Any non-contradictory combination of the preceding values is also
+ * allowed; for example "1, 3-7, 9, >11".
+ */
+ private String occurrence;
+
+ /**
+ * List of substitute structures.
+ */
+ private List rGroups;
+
+ /**
+ * The rGroup (say B) that is required when this one (say A) exists.
+ * This captures the "LOG" information 'IF A (this) THEN B'.
+ */
+ private int requiredRGroupNumber;
+
+
+ /**
+ * Default constructor.
+ */
+ public RGroupList(int rGroupNumber) {
+ setRGroupNumber(rGroupNumber);
+ this.restH = false;
+ this.occurrence = DEFAULT_OCCURRENCE;
+ this.requiredRGroupNumber=0;
+ }
+
+ /**
+ * Constructor with attributes given.
+ *
+ * @param rGroupNumber R-Group number
+ * @param restH restH
+ * @param occurrence occurrence
+ * @param requiredRGroupNumber number of other R-Group required
+ * @throws CDKException
+ */
+ public RGroupList(int rGroupNumber, boolean restH, String occurrence, int requiredRGroupNumber) throws CDKException {
+ setRGroupNumber(rGroupNumber);
+ setRestH(restH);
+ setOccurrence(occurrence);
+ setRequiredRGroupNumber(requiredRGroupNumber);
+ }
+
+ /**
+ * Setter for rGroupNumber, checks for valid range.
+ * Spec: "value from 1 to 32 *, labels position of Rgroup on root."
+ * @param rGroupNumber R-Group number
+ */
+ public void setRGroupNumber(int rGroupNumber) {
+
+ if (rGroupNumber < 1 || rGroupNumber > 32) {
+ throw new RuntimeException("Rgroup number must be between 1 and 32.");
+ }
+ this.rGroupNumber = rGroupNumber;
+ }
+
+ public int getRGroupNumber() {
+ return rGroupNumber;
+ }
+
+ public void setRestH(boolean restH) {
+ this.restH = restH;
+ }
+
+ public boolean isRestH() {
+ return restH;
+ }
+
+ public void setRequiredRGroupNumber(int rGroupNumberImplicated) {
+ this.requiredRGroupNumber = rGroupNumberImplicated;
+ }
+
+ public int getRequiredRGroupNumber() {
+ return requiredRGroupNumber;
+ }
+
+ public void setRGroups(List rGroups) {
+ this.rGroups = rGroups;
+ }
+
+ public List getRGroups() {
+ return rGroups;
+ }
+
+ /**
+ * Returns the occurrence value.
+ * @return occurrence
+ */
+ public String getOccurrence() {
+ return occurrence;
+ }
+
+ /**
+ * Picky setter for occurrence fields. Validates user input to be conform
+ * the (Symyx) specification.
+ * @param occurrence occurence value
+ */
+ public void setOccurrence(String occurrence) throws CDKException {
+ if (occurrence == null || occurrence.equals("")) {
+ occurrence = ">0"; //revert to default
+ } else {
+ occurrence = occurrence.trim().replaceAll(" ", "");
+ if (isValidOccurrenceSyntax(occurrence)) {
+ this.occurrence = occurrence;
+ } else
+ throw new CDKException("Invalid occurence line: " + occurrence);
+ }
+ }
+
+ /**
+ * Validates the occurrence value.
+ *
+ * - n : exactly n ;
+ * - n - m : n through m ;
+ * - > n : greater than n ;
+ * - < n : fewer than n ;
+ * - default (blank) is > 0 ;
+ *
+ * Any combination of the preceding values is also
+ * allowed; for example "1, 3-7, 9, >11".
+ * @param occ String to validate.
+ * @return true if valid String provided.
+ */
+ public static boolean isValidOccurrenceSyntax(String occ) {
+ StringTokenizer st = new StringTokenizer(occ, ",");
+ while (st.hasMoreTokens()) {
+ String cond = st.nextToken().trim().replaceAll(" ", "");
+ do {
+ //Number: "n"
+ if (match("^\\d+$", cond)) {
+ if (new Integer(cond)<0) // not allowed
+ return false;
+ break;
+ }
+ //Range: "n-m"
+ if (match("^\\d+-\\d+$", cond)) {
+ int from = new Integer(cond.substring(0,cond.indexOf("-")));
+ int to = new Integer(cond.substring(cond.indexOf("-")+1,cond.length()));
+ if (from<0 || to <0 || ton"
+ if (match("^>\\d+$", cond)) {
+ break;
+ }
+
+ return false;
+ } while (1==0);
+ }
+
+ return true;
+ }
+
+ /**
+ * Helper method for regular expression matching.
+ * @param regExp regular expression String
+ * @param userInput user's input
+ * @return
+ */
+ private static boolean match(String regExp, String userInput) {
+ Pattern pattern = Pattern.compile(regExp);
+ Matcher matcher = pattern.matcher(userInput);
+ if (matcher.find())
+ return true;
+ else
+ return false;
+ }
+
+
+ /**
+ * Matches the 'occurrence' condition with a provided maximum number of
+ * RGroup attachments. Returns the valid occurrences (numeric) for these
+ * two combined. If none found, returns empty list.
+ * Example: if R1 occurs 3 times attached to some root structure, then
+ * stating ">5" as an occurrence for that RGoupList does not make
+ * sense: the example R1 can occur 0..3 times. Empty would be returned.
+ * If the occurence would be >2, then 3 would be returned. Etcetera.
+ *
+ * @param maxAttachments number of attachments
+ * @return valid values by combining a max for R# with the occurrence cond.
+ */
+ public List matchOccurence(int maxAttachments) {
+
+ List validValues = new ArrayList();
+
+ for (int val = 0; val <= maxAttachments; val++) {
+ boolean addVal=false;
+
+ StringTokenizer st = new StringTokenizer(occurrence, ",");
+ while (st.hasMoreTokens() && !addVal) {
+ String cond = st.nextToken().trim().replaceAll(" ", "");
+ if (match("^\\d+$", cond)) { // n
+ if(new Integer(cond)==val)
+ addVal=true;
+ }
+ if (match("^\\d+-\\d+$", cond)) { // n-m
+ int from = new Integer(cond.substring(0,cond.indexOf("-")));
+ int to = new Integer(cond.substring(cond.indexOf("-")+1,cond.length()));
+ if ( val>=from && val <=to) {
+ addVal=true;
+ }
+ }
+ if (match("^>\\d+$", cond)) { // ")+1,cond.length()));
+ if(val>n){
+ addVal=true;
+ }
+ }
+ if (match("^<\\d+$", cond)) { // >n
+ int n = new Integer(cond.substring(cond.indexOf("<")+1,cond.length()));
+ if(val
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.isomorphism.matchers;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.vecmath.Point2d;
+
+import org.openscience.cdk.CDKConstants;
+import org.openscience.cdk.ChemObject;
+import org.openscience.cdk.PseudoAtom;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.interfaces.IChemObject;
+import org.openscience.cdk.tools.ILoggingTool;
+import org.openscience.cdk.tools.LoggingToolFactory;
+
+
+/**
+ * Represents information contained in a Symyx RGfile (R-group query file).
+ * It contains a root structure (the scaffold if you like), a map with
+ * R-group definitions (each of which can contain multiple substitutes) and
+ * a map with attachment points. The attachment points define a connection
+ * order for the substitutes, which is relevant when an Rgroup is connected
+ * to the scaffold with more than one bond.
+ *
+ * This class can also be used to produce all the valid configurations
+ * for the combination of its root,definitions and conditions.
+ *
+ * This Javadoc does not contain a code sample how to create a new RGroupQuery
+ * from scratch, because a sensible RGroupQuery has quite a few attributes to be set
+ * including a root plus a bunch of substituents, which are all atom containers.
+ * So that would be a lot of sample code here.
+ * The best way to get a feel for the way the RGroup objects are populated is to
+ * run the {@link org.openscience.cdk.io.RGroupQueryReaderTest} and look at the sample
+ * input RGroup query files contained in the CDK and how they translate into
+ * RGroupXX objects. The JChempaint application can visualize the input files for you.
+ *
+ * @cdk.module isomorphism
+ * @cdk.githash
+ * @cdk.keyword Rgroup
+ * @cdk.keyword R group
+ * @cdk.keyword R-group
+ * @author Mark Rijnbeek
+ */
+public class RGroupQuery extends ChemObject implements IChemObject, Serializable, IRGroupQuery {
+
+ private static final long serialVersionUID = -1656116487614720605L;
+
+ private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQuery.class);
+
+ /**
+ * The root structure (or scaffold) to which R-groups r attached.
+ */
+ private IAtomContainer rootStructure;
+
+ /**
+ * Rgroup definitions, each a list of possible substitutes for the
+ * given R number.
+ */
+ private Map rGroupDefinitions;
+
+ /**
+ * For each Rgroup Atom there may be a map containing (number,bond),
+ * being the attachment order (1,2) and the bond to attach to.
+ */
+ private Map> rootAttachmentPoints;
+
+
+ /**
+ * Returns all R# type atoms (pseudo atoms) found in the root structure
+ * for a certain provided RGgroup number.
+ * @param rgroupNumber R# number, 1..32
+ * @return list of (pseudo) atoms with the provided rgroupNumber as label
+ */
+ public List getRgroupQueryAtoms(Integer rgroupNumber) {
+
+ List rGroupQueryAtoms = null;
+
+ if (rootStructure != null) {
+ rGroupQueryAtoms = new ArrayList();
+
+ for (int i = 0; i < rootStructure.getAtomCount(); i++) {
+ IAtom atom = rootStructure.getAtom(i);
+ if (atom instanceof PseudoAtom) {
+ PseudoAtom rGroup = (PseudoAtom)atom;
+ if (!rGroup.getLabel().equals("R") && // just "R" is not a proper query atom
+ rGroup.getLabel().startsWith("R") &&
+ (rgroupNumber == null || new Integer(rGroup.getLabel().substring(1)).equals(rgroupNumber)))
+ rGroupQueryAtoms.add(atom);
+ }
+ }
+ }
+ return rGroupQueryAtoms;
+ }
+
+
+ /**
+ * Returns all R# type atoms (pseudo atoms) found in the root structure.
+ * @return list of (pseudo) R# atoms
+ */
+ public List getAllRgroupQueryAtoms() {
+ return getRgroupQueryAtoms(null);
+ }
+
+
+ private static Pattern validLabelPattern = Pattern.compile("^R\\d+$");
+
+ /**
+ * Validates a Pseudo atom's label to be valid RGroup query label (R1..R32).
+ * @param Rxx R-group label like R1 or R10
+ * @return true if R1..R32, otherwise false
+ */
+ public static boolean isValidRgroupQueryLabel(String Rxx) {
+ Matcher matcher = validLabelPattern.matcher(Rxx);
+ if (matcher.find()) {
+ int groupNumber = new Integer(Rxx.substring(1));
+ if (groupNumber >= 1 && groupNumber <= 32) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public boolean areSubstituentsDefined() {
+ List allRgroupAtoms = getAllRgroupQueryAtoms();
+ if (allRgroupAtoms == null)
+ return false;
+
+ for (IAtom rgp : allRgroupAtoms) {
+ if (RGroupQuery.isValidRgroupQueryLabel(((PseudoAtom)rgp).getLabel())) {
+ int groupNum = new Integer(((PseudoAtom)rgp).getLabel().substring(1));
+ if (rGroupDefinitions == null || rGroupDefinitions.get(groupNum) == null ||
+ rGroupDefinitions.get(groupNum).getRGroups() == null ||
+ rGroupDefinitions.get(groupNum).getRGroups().size() == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ public boolean areRootAtomsDefined() {
+ for (Integer rgpNum : rGroupDefinitions.keySet()) {
+ boolean represented=false;
+ rootLoop:
+ for (IAtom rootAtom : this.getRootStructure().atoms()) {
+ if (rootAtom instanceof PseudoAtom && rootAtom.getSymbol().startsWith("R")) {
+ PseudoAtom pseudo = (PseudoAtom) rootAtom;
+ if(pseudo.getLabel().length()>1) {
+ int rootAtomRgrpNumber = new Integer(pseudo.getLabel().substring(1));
+ if (rootAtomRgrpNumber==rgpNum) {
+ represented=true;
+ break rootLoop;
+ }
+ }
+ }
+ }
+ if(!represented) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ public List getAllConfigurations() throws CDKException {
+
+ if (!areSubstituentsDefined()) {
+ throw new CDKException("Can not configure molecules: missing R# group definitions.");
+ }
+
+ //result = a list of concrete atom containers that are valid interpretations of the RGroup query
+ List result = new ArrayList();
+
+
+ //rGroupNumbers = list holding each R# number for this RGroup query
+ List rGroupNumbers = new ArrayList();
+
+ //distributions = a list of valid distributions, that is a one/zero representation
+ // indicating which atom in an atom series belonging to a particular
+ // R# group is present (1) or absent (0).
+ List distributions = new ArrayList();
+
+
+ List> substitutes = new ArrayList>();
+
+ //Valid occurrences for each R# group
+ List> occurrences = new ArrayList>();
+ List occurIndexes = new ArrayList();
+
+ //Build up each R# group data before recursively finding configurations.
+ Iterator rGroupNumItr = rGroupDefinitions.keySet().iterator();
+ if (rGroupNumItr.hasNext()) {
+ while (rGroupNumItr.hasNext()) {
+ int r = rGroupNumItr.next();
+ rGroupNumbers.add(r);
+ List validOcc = rGroupDefinitions.get(r).matchOccurence(getRgroupQueryAtoms(r).size());
+ if (validOcc.size() == 0) {
+ throw new CDKException("Occurrence '" + rGroupDefinitions.get(r).getOccurrence() +
+ "' defined for Rgroup " + r +
+ " results in no subsititute options for this R-group.");
+ }
+ occurrences.add(validOcc);
+ occurIndexes.add(0);
+ }
+ //Init distributions: empty and with the right list size
+ for (int i = 0; i < rGroupNumbers.size(); i++) {
+ distributions.add(null);
+ substitutes.add(null);
+ }
+
+ //Start finding valid configurations using recursion, output will be put in 'result'.
+ findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions, substitutes, 0,
+ result);
+
+ }
+ return result;
+ }
+
+
+ /**
+ * Recursive function to produce valid configurations
+ * for {@link #getAllConfigurations()}.
+ */
+ private void findConfigurationsRecursively(List rGroupNumbers, List> occurrences,
+ List occurIndexes, List distributions,
+ List> substitutes, int level,
+ List result) throws CDKException {
+
+ if (level == rGroupNumbers.size()) {
+
+ if (!checkIfThenConditionsMet(rGroupNumbers, distributions))
+ return;
+
+
+ // Clone the root to get a scaffold to plug the substitutes into.
+ IAtomContainer root = this.getRootStructure();
+ IAtomContainer rootClone = null;
+ try {
+ rootClone = (IAtomContainer)root.clone();
+ } catch (CloneNotSupportedException e) {
+ //Abort with CDK exception
+ throw new CDKException("clone() failed; could not perform R-group substitution.");
+ }
+
+ for (int rgpIdx = 0; rgpIdx < rGroupNumbers.size(); rgpIdx++) {
+
+ int rNum = rGroupNumbers.get(rgpIdx);
+ int pos = 0;
+
+ List mapped = substitutes.get(rgpIdx);
+ for (RGroup substitute : mapped) {
+ IAtom rAtom = this.getRgroupQueryAtoms(rNum).get(pos);
+ if (substitute !=null) {
+
+ IAtomContainer rgrpClone = null;
+ try {
+ rgrpClone = (IAtomContainer)(substitute.getGroup().clone());
+ } catch (CloneNotSupportedException e) {
+ throw new CDKException("clone() failed; could not perform R-group substitution.");
+ }
+
+ //root cloned, substitute cloned. These now need to be attached to each other..
+ rootClone.add(rgrpClone);
+
+ Map rAttachmentPoints = this.getRootAttachmentPoints().get(rAtom);
+ if (rAttachmentPoints != null) {
+ // Loop over attachment points of the R# atom
+ for (int apo = 0; apo < rAttachmentPoints.size(); apo++) {
+ IBond bond = rAttachmentPoints.get(apo + 1);
+ //Check how R# is attached to bond
+ int whichAtomInBond = 0;
+ if (bond.getAtom(1).equals(rAtom))
+ whichAtomInBond = 1;
+ IAtom subsAt = null;
+ if (apo == 0)
+ subsAt = substitute.getFirstAttachmentPoint();
+ else
+ subsAt = substitute.getSecondAttachmentPoint();
+
+ //Do substitution with the clones
+ IBond cloneBond = rootClone.getBond(getBondPosition(bond, root));
+ if (subsAt != null) {
+ IAtom subsCloneAtom =
+ rgrpClone.getAtom(getAtomPosition(subsAt, substitute.getGroup()));
+ cloneBond.setAtom(subsCloneAtom, whichAtomInBond);
+ }
+ }
+ }
+
+ //Optional: shift substitutes 2D for easier visual checking
+ if (rAtom.getPoint2d() != null && substitute != null &&
+ substitute.getFirstAttachmentPoint() != null &&
+ substitute.getFirstAttachmentPoint().getPoint2d() != null) {
+ Point2d pointR = rAtom.getPoint2d();
+ Point2d pointC = substitute.getFirstAttachmentPoint().getPoint2d();
+ double xDiff = pointC.x - pointR.x;
+ double yDiff = pointC.y - pointR.y;
+ for (IAtom subAt : rgrpClone.atoms()) {
+ if (subAt.getPoint2d() != null) {
+ subAt.getPoint2d().x -= xDiff;
+ subAt.getPoint2d().y -= yDiff;
+ }
+ }
+ }
+ } else {
+ //Distribution flag is 0, this means the R# group will not be substituted.
+ //Any atom connected to this group should be given the defined RestH value.
+ IAtom discarded = rootClone.getAtom(getAtomPosition(rAtom, root));
+ for (IBond r0Bond : rootClone.bonds()) {
+ if (r0Bond.contains(discarded)) {
+ for (IAtom atInBond : r0Bond.atoms()) {
+ atInBond.setProperty(CDKConstants.REST_H,
+ this.getRGroupDefinitions().get(rNum).isRestH());
+ }
+ }
+ }
+ }
+
+ pos++;
+ }
+ }
+
+ //Remove R# remnants from the clone, bonds and atoms that may linger.
+ boolean confHasRGroupBonds = true;
+ while (confHasRGroupBonds) {
+ for (IBond cloneBond : rootClone.bonds()) {
+ boolean removeBond = false;
+ if (cloneBond.getAtom(0) instanceof PseudoAtom &&
+ isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(0)).getLabel()))
+ removeBond = true;
+ else if (cloneBond.getAtom(1) instanceof PseudoAtom &&
+ isValidRgroupQueryLabel(((PseudoAtom)cloneBond.getAtom(1)).getLabel()))
+ removeBond = true;
+
+ if (removeBond) {
+ rootClone.removeBond(cloneBond);
+ confHasRGroupBonds = true;
+ break;
+ }
+ confHasRGroupBonds = false;
+ }
+ }
+ boolean confHasRGroupAtoms = true;
+ while (confHasRGroupAtoms) {
+ for (IAtom cloneAt : rootClone.atoms()) {
+ if (cloneAt instanceof PseudoAtom)
+ if (isValidRgroupQueryLabel(((PseudoAtom)cloneAt).getLabel())) {
+ rootClone.removeAtom(cloneAt);
+ confHasRGroupAtoms = true;
+ break;
+ }
+ confHasRGroupAtoms = false;
+ }
+ }
+ //Add to result list
+ result.add(rootClone);
+
+
+ } else {
+ for (int idx = 0; idx < occurrences.get(level).size(); idx++) {
+ occurIndexes.set(level, idx);
+ //With an occurrence picked 0..n for this level's R-group, now find
+ //all possible distributions (positional alternatives).
+ int occurrence = occurrences.get(level).get(idx);
+ int positions = this.getRgroupQueryAtoms(rGroupNumbers.get(level)).size();
+ Integer[] candidate = new Integer[positions];
+ for (int j = 0; j < candidate.length; j++) {
+ candidate[j] = 0;
+ }
+ List rgrpDistributions = new ArrayList();
+ findDistributions(occurrence, candidate, rgrpDistributions, 0);
+
+ for (Integer[] distribution : rgrpDistributions) {
+ distributions.set(level, distribution);
+
+
+ RGroup[] mapping = new RGroup[distribution.length];
+ List> mappedSubstitutes = new ArrayList>();
+ mapSubstitutes(this.getRGroupDefinitions().get(rGroupNumbers.get(level)),0, distribution, mapping, mappedSubstitutes);
+
+ for (List mappings : mappedSubstitutes) {
+ substitutes.set(level,mappings);
+ findConfigurationsRecursively(rGroupNumbers, occurrences, occurIndexes, distributions,
+ substitutes, level + 1, result);
+
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Finds valid distributions for a given R# group and it occurrence
+ * condition taken from the LOG line.
+ * For example: if we have three Rn group atoms, and ">2" for
+ * the occurrence, then there are fours possible ways to make a
+ * distribution: 3 ways to put in two atoms, and one way
+ * to put in all 3 atoms. Etc.
+ * @param occur
+ * @param candidate
+ * @param distributions
+ * @param level
+ */
+ private void findDistributions(int occur, Integer[] candidate, List distributions, int level) {
+ if (level != candidate.length) {
+ for (int i = 0; i < 2; i++) {
+ candidate[level] = i;
+
+ int sum = 0;
+ for (int x = 0; x < candidate.length; x++)
+ sum += candidate[x];
+
+ if (sum == occur) {
+ distributions.add(candidate.clone());
+ } else {
+ findDistributions(occur, candidate, distributions, level + 1);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Maps the distribution of an R-group to all possible substitute combinations.
+ * This is best illustrated by an example.
+ * Say R2 occurs twice in the root, and has condition >0. So a valid
+ * output configuration can have either one or two substitutes.
+ * The distributions will have been calculated to be the following
+ * solutions: [0,1], [1,0], [1,1]
+ * To start with [1,1], assume two possible substitutes have been
+ * defined for R2, namely *C=O and *C-N. Then the distribution [1,1]
+ * should lead to four mappings:
+ * [*C=O,*C=O], [*C-N,*C-N], [*C=O,*C-N], [*C-N,*C=O].
+ * These mappings are generated in this function, as well as the other valid mappings
+ * for [0,1] and [1,0]:
+ * [*C=O,null], [*C-N,null], [null,*C=O], [null,*C-N].
+ * So the example would have this function produce eight mappings (result list size==8).
+ *
+ * @param rgpList
+ * @param listOffset
+ * @param distribution
+ * @param mapping
+ * @param result
+ */
+ private void mapSubstitutes(RGroupList rgpList, int listOffset, Integer[] distribution, RGroup[] mapping, List> result) {
+ if(listOffset==distribution.length) {
+ List mapped= new ArrayList();
+ for(RGroup rgrp : mapping)
+ mapped.add(rgrp);
+ result.add(mapped);
+ }
+ else {
+ if (distribution[listOffset]==0) {
+ mapping[listOffset]=null;
+ mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result);
+ }
+ else {
+ for (RGroup rgrp :rgpList.getRGroups()) {
+ mapping[listOffset]=rgrp;
+ mapSubstitutes(rgpList, listOffset+1, distribution, mapping, result);
+ }
+ }
+ }
+ }
+
+
+ /**
+ * Helper method, used to help construct a configuration.
+ * @param atom
+ * @param container
+ * @return the array position of atom in container
+ */
+ private int getAtomPosition(IAtom atom, IAtomContainer container) {
+ for (int i = 0; i < container.getAtomCount(); i++) {
+ if (atom.equals(container.getAtom(i))) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Helper method, used to help construct a configuration.
+ * @param bond
+ * @param container
+ * @return the array position of the bond in the container
+ */
+ private int getBondPosition(IBond bond, IAtomContainer container) {
+ for (int i = 0; i < container.getBondCount(); i++) {
+ if (bond.equals(container.getBond(i))) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Helper method to see if an array is all zeroes or not.
+ * Used to check if the distribution of substitutes over an R-group
+ * is all zeroes, meaning there will be no substitution done.
+ * @param arr
+ * @return true if arr's values are all zero.
+ */
+ private boolean allZeroArray(Integer[] arr) {
+ for (int flag : arr)
+ if (flag != 0)
+ return false;
+ return true;
+ }
+
+ /**
+ * Checks whether IF..THEN conditions that can be set for the R-groups are met.
+ * It is used to filter away invalid configurations in {@link #findConfigurationsRecursively}.
+ *
+ * Scenario: suppose R1 is substituted 0 times, whereas R2 is substituted.
+ * Also suppose there is a condition IF R2 THEN R1. Because R1 does not
+ * occur but R2 does, the IF..THEN condition is not met: this function
+ * will return false, the configuration should be discarded.
+ * @param rGroupNumbers
+ * @param distributions
+ * @return true if all IF..THEN RGroup conditions are met.
+ */
+ private boolean checkIfThenConditionsMet(List rGroupNumbers, List distributions) {
+ for (int outer = 0; outer < rGroupNumbers.size(); outer++) {
+ int rgroupNum = rGroupNumbers.get(outer);
+ if (allZeroArray(distributions.get(outer))) {
+ for (int inner = 0; inner < rGroupNumbers.size(); inner++) {
+ int rgroupNum2 = rGroupNumbers.get(inner);
+ if (!allZeroArray(distributions.get(inner))) {
+ RGroupList rgrpList = rGroupDefinitions.get(rgroupNum2);
+ if (rgrpList.getRequiredRGroupNumber() == rgroupNum) {
+ logger.info(" Rejecting >> all 0 for " + rgroupNum + " but requirement found from " +
+ rgrpList.getRGroupNumber());
+ return false;
+ }
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ public int getAtomContainerCount() {
+ int retVal=0;
+ if(this.rootStructure!=null)
+ retVal++;
+ for(Integer r: rGroupDefinitions.keySet()) {
+ for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) {
+ if (rgrp.getGroup()!=null) {
+ retVal++;
+ }
+ }
+ }
+ return retVal;
+ }
+
+
+ public List getSubstituents() {
+ List substitutes = new ArrayList();
+ for(Integer r : rGroupDefinitions.keySet()) {
+ for (RGroup rgrp : rGroupDefinitions.get(r).getRGroups()) {
+ IAtomContainer subst =rgrp.getGroup();
+ if (subst!=null)
+ substitutes.add(subst);
+ }
+ }
+ return substitutes;
+ }
+
+ public void setRootStructure(IAtomContainer rootStructure) {
+ this.rootStructure = rootStructure;
+ }
+
+ public IAtomContainer getRootStructure() {
+ return rootStructure;
+ }
+
+ public void setRootAttachmentPoints(Map> rootAttachmentPoints) {
+ this.rootAttachmentPoints = rootAttachmentPoints;
+ }
+
+ public Map> getRootAttachmentPoints() {
+ return rootAttachmentPoints;
+ }
+
+ public void setRGroupDefinitions(Map rGroupDefinitions) {
+ this.rGroupDefinitions = rGroupDefinitions;
+ }
+
+ public Map getRGroupDefinitions() {
+ return rGroupDefinitions;
+ }
+}
diff --git a/src/test/org/openscience/cdk/io/ChemObjectIOTest.java b/src/test/org/openscience/cdk/io/ChemObjectIOTest.java
index fee7e1b64ff..66da7b07885 100644
--- a/src/test/org/openscience/cdk/io/ChemObjectIOTest.java
+++ b/src/test/org/openscience/cdk/io/ChemObjectIOTest.java
@@ -39,6 +39,7 @@
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.listener.IChemObjectIOListener;
import org.openscience.cdk.io.setting.IOSetting;
+import org.openscience.cdk.isomorphism.matchers.RGroupQuery;
import org.openscience.cdk.nonotify.NNAtomContainer;
import org.openscience.cdk.nonotify.NNAtomContainerSet;
import org.openscience.cdk.nonotify.NNChemFile;
@@ -107,7 +108,7 @@ public static void setChemObjectIO(IChemObjectIO aChemObjectIO) {
protected static IChemObject[] acceptableChemObjects = {
new ChemFile(), new ChemModel(), new Molecule(),
- new Reaction()
+ new Reaction(), new RGroupQuery()
};
@Test public void testAcceptsAtLeastOneChemObject() {
diff --git a/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java b/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java
new file mode 100644
index 00000000000..48660541cde
--- /dev/null
+++ b/src/test/org/openscience/cdk/io/RGroupQueryReaderTest.java
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io;
+
+import java.io.InputStream;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.openscience.cdk.CDKConstants;
+import org.openscience.cdk.Molecule;
+import org.openscience.cdk.PseudoAtom;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IAtom;
+import org.openscience.cdk.interfaces.IAtomContainer;
+import org.openscience.cdk.interfaces.IBond;
+import org.openscience.cdk.io.formats.IChemFormat;
+import org.openscience.cdk.io.formats.RGroupQueryFormat;
+import org.openscience.cdk.isomorphism.matchers.RGroup;
+import org.openscience.cdk.isomorphism.matchers.RGroupList;
+import org.openscience.cdk.isomorphism.matchers.RGroupQuery;
+import org.openscience.cdk.tools.ILoggingTool;
+import org.openscience.cdk.tools.LoggingToolFactory;
+
+
+/**
+ * JUnit tests for {@link org.openscience.cdk.io.RGroupQueryReader}.
+ * @cdk.module test-io
+ * @author Mark Rijnbeek
+ */
+public class RGroupQueryReaderTest extends SimpleChemObjectReaderTest {
+ public RGroupQueryReaderTest() {
+ }
+ private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RGroupQueryReaderTest.class);
+
+ @BeforeClass
+ public static void setup() {
+ setSimpleChemObjectReader(new RGroupQueryReader(), "data/mdl/rgfile.1.mol");
+ }
+
+ @Test
+ public void testAccepts() {
+ RGroupQueryReader reader = new RGroupQueryReader();
+ Assert.assertFalse(reader.accepts(Molecule.class));
+ Assert.assertTrue(reader.accepts(RGroupQuery.class));
+ }
+
+ public void testAcceptsAtLeastOneDebugObject() {
+ }
+
+ public void testAcceptsAtLeastOneNonotifyObject() {
+ }
+
+ /**
+ * Test that the format factory guesses the correct IChemFormat
+ * based on the file content.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testRGFileFormat() throws Exception {
+ String filename = "data/mdl/rgfile.1.mol";
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ IChemFormat format = new FormatFactory().guessFormat(ins);
+ Assert.assertEquals(format.getClass(),RGroupQueryFormat.class);
+ }
+
+ /**
+ * Test parsing of RGFile rgfile.1.mol.
+ * Simple R-group query file.
+ */
+ @Test
+ public void testRgroupQueryFile1() throws Exception {
+ String filename = "data/mdl/rgfile.1.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 7);
+
+ for (IAtom at : rGroupQuery.getAllRgroupQueryAtoms()) {
+ if (at instanceof PseudoAtom) {
+ Assert.assertEquals(((PseudoAtom)at).getLabel(), "R1");
+ Map rootApo = rGroupQuery.getRootAttachmentPoints();
+ Map apoBonds = (Map)rootApo.get(at);
+ Assert.assertEquals(apoBonds.size(), 1);
+ // Assert that the root attachment is the bond between R1 and P
+ for (IBond bond : rGroupQuery.getRootStructure().bonds()) {
+ if (bond.contains(at)) {
+ Assert.assertEquals(bond, apoBonds.get(1));
+ for (IAtom atInApo : bond.atoms()) {
+ Assert.assertTrue(atInApo.getSymbol().equals("R") || atInApo.getSymbol().equals("P"));
+ }
+ }
+ }
+ }
+ }
+
+ Iterator itr = rGroupQuery.getRGroupDefinitions().keySet().iterator();
+ int val_1 = itr.next();
+ Assert.assertEquals(val_1, 1);
+ RGroupList rList = rGroupQuery.getRGroupDefinitions().get(val_1);
+ Assert.assertEquals(rList.getOccurrence(), "0,1-3");
+
+ List rGroups = rList.getRGroups();
+ Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint().getSymbol(), "N");
+ Assert.assertEquals(rGroups.get(1).getFirstAttachmentPoint().getSymbol(), "O");
+ Assert.assertEquals(rGroups.get(2).getFirstAttachmentPoint().getSymbol(), "S");
+
+ Assert.assertNull(rGroups.get(0).getSecondAttachmentPoint());
+ Assert.assertNull(rGroups.get(1).getSecondAttachmentPoint());
+ Assert.assertNull(rGroups.get(2).getSecondAttachmentPoint());
+
+ List configurations = rGroupQuery.getAllConfigurations();
+ Assert.assertEquals(configurations.size(), 4);
+
+ //RestH is set to true for R1, so with zero substitutes, the phosphor should get the restH flag set to true.
+ boolean restH_Identified=false;
+ for(IAtomContainer atc : configurations){
+ if (atc.getAtomCount()==6) {
+ for (IAtom atom : atc.atoms() ) {
+ if (atom.getSymbol().equals("P")) {
+ Assert.assertNotNull(atom.getProperty(CDKConstants.REST_H));
+ Assert.assertEquals(atom.getProperty(CDKConstants.REST_H),true);
+ restH_Identified=true;
+ }
+ }
+ }
+ }
+ Assert.assertTrue(restH_Identified);
+ }
+
+
+ /**
+ * Test parsing of RGFile rgfile.2.mol.
+ * More elaborate R-group query file.
+ */
+ @Test
+ public void testRgroupQueryFile2() throws Exception {
+ String filename = "data/mdl/rgfile.2.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 3);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 14);
+ Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 4);
+
+ List rGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms();
+ Assert.assertEquals(rGroupQueryAtoms.size(), 4);
+
+ rGroupQueryAtoms = rGroupQuery.getRgroupQueryAtoms(1);
+ Assert.assertEquals(rGroupQueryAtoms.size(), 1);
+
+ for (IAtom at : rGroupQuery.getAllRgroupQueryAtoms()) {
+ if (at instanceof PseudoAtom) {
+ Assert.assertTrue(RGroupQuery.isValidRgroupQueryLabel(((PseudoAtom)at).getLabel()));
+ int rgroupNum = new Integer((((PseudoAtom)at).getLabel()).substring(1));
+ Assert.assertTrue(rgroupNum == 1 || rgroupNum == 2 || rgroupNum == 11);
+ switch (rgroupNum) {
+ case 1:
+ {
+ //Test: R1 has two attachment points, defined by AAL
+ Map rootApo = rGroupQuery.getRootAttachmentPoints();
+ Map apoBonds = (Map)rootApo.get(at);
+ Assert.assertEquals(apoBonds.size(), 2);
+ Assert.assertEquals(apoBonds.get(1).getConnectedAtom(at).getSymbol(), "N");
+ Assert.assertTrue(apoBonds.get(2).getConnectedAtom(at).getSymbol().equals("C"));
+ //Test: Oxygens are the 2nd APO's for R1
+ RGroupList rList = rGroupQuery.getRGroupDefinitions().get(1);
+ Assert.assertEquals(rList.getRGroups().size(), 2);
+ List rGroups = rList.getRGroups();
+ Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint().getSymbol(), "O");
+ Assert.assertEquals(rGroups.get(1).getSecondAttachmentPoint().getSymbol(), "O");
+ Assert.assertFalse(rList.isRestH());
+ }
+ break;
+ case 2:
+ {
+ RGroupList rList = rGroupQuery.getRGroupDefinitions().get(2);
+ Assert.assertEquals(rList.getRGroups().size(), 2);
+ Assert.assertEquals(rList.getOccurrence(), "0,2");
+ Assert.assertEquals(rList.getRequiredRGroupNumber(), 11);
+ Assert.assertFalse(rList.isRestH());
+ }
+ break;
+ case 11:
+ {
+ RGroupList rList = rGroupQuery.getRGroupDefinitions().get(11);
+ Assert.assertEquals(rList.getRGroups().size(), 1);
+ Assert.assertEquals(rList.getRequiredRGroupNumber(), 0);
+ Assert.assertTrue(rList.isRestH());
+
+ List rGroups = rList.getRGroups();
+ Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint().getSymbol(), "Pt");
+ Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint(), null);
+ }
+ break;
+ }
+ }
+ }
+
+ List configurations = rGroupQuery.getAllConfigurations();
+ Assert.assertEquals(configurations.size(), 12);
+
+ //Test restH values
+ int countRestHForSmallestConfigurations=0;
+ for(IAtomContainer atc : configurations){
+ if (atc.getAtomCount()==13) { // smallest configuration
+ for (IAtom atom : atc.atoms() ) {
+ if (atom.getProperty(CDKConstants.REST_H)!=null) {
+ countRestHForSmallestConfigurations++;
+ if (atom.getSymbol().equals("P"))
+ Assert.assertEquals(atom.getProperty(CDKConstants.REST_H),true);
+ }
+ }
+ }
+ }
+ Assert.assertEquals(countRestHForSmallestConfigurations,6);
+
+ }
+
+ /**
+ * Test parsing of RGFile rgfile.3.mol.
+ * This R-group query has R1 bound double twice, and has AAL lines to parse.
+ */
+ @Test
+ public void testRgroupQueryFile3() throws Exception {
+ String filename = "data/mdl/rgfile.3.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 10);
+ Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 2);
+
+ Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 8);
+
+ //Test correctness AAL lines
+ for (IAtom at : rGroupQuery.getRgroupQueryAtoms(1)) {
+ if (at instanceof PseudoAtom) {
+ Assert.assertEquals(((PseudoAtom)at).getLabel(), "R1");
+
+ Map apoBonds = rGroupQuery.getRootAttachmentPoints().get(at);
+ Assert.assertEquals(apoBonds.size(), 2);
+
+ IAtom boundAtom1 = apoBonds.get(1).getConnectedAtom(at);
+ Assert.assertTrue(boundAtom1.getSymbol().equals("Te") || boundAtom1.getSymbol().equals("S"));
+
+ IAtom boundAtom2 = apoBonds.get(2).getConnectedAtom(at);
+ Assert.assertTrue(boundAtom2.getSymbol().equals("Po") || boundAtom2.getSymbol().equals("O"));
+ }
+ }
+
+ // Test that there only two Rgroup query atoms (R#). The third R is a
+ // pseudo atom, but because it is not numbered it is not part of any
+ // query condition.
+ List allrGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms();
+ Assert.assertEquals(allrGroupQueryAtoms.size(), 2);
+ }
+
+ /**
+ * Test parsing of RGFile rgfile.4.mol.
+ * This R-group query has its R# atom detached, no bounds.
+ */
+ @Test
+ public void testRgroupQueryFile4() throws Exception {
+ String filename = "data/mdl/rgfile.4.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 6);
+
+ List allrGroupQueryAtoms = rGroupQuery.getAllRgroupQueryAtoms();
+ Assert.assertEquals(allrGroupQueryAtoms.size(), 1);
+ RGroupList rList = rGroupQuery.getRGroupDefinitions().get(1);
+ Assert.assertEquals(rList.getRGroups().size(), 2);
+ Assert.assertEquals(rList.getRequiredRGroupNumber(), 0);
+ Assert.assertFalse(rList.isRestH());
+ Assert.assertEquals(rGroupQuery.getRootAttachmentPoints().size(), 0);
+ Assert.assertTrue(rGroupQuery.areSubstituentsDefined());
+
+ Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 2);
+
+ // This query has a detached R-group, test for empty attachment points
+ List rGroups = rList.getRGroups();
+ Assert.assertEquals(rGroups.get(0).getFirstAttachmentPoint(), null);
+ Assert.assertEquals(rGroups.get(0).getSecondAttachmentPoint(), null);
+ Assert.assertEquals(rGroups.get(1).getFirstAttachmentPoint(), null);
+ Assert.assertEquals(rGroups.get(1).getSecondAttachmentPoint(), null);
+ }
+
+
+ /**
+ * Test parsing of RGFile rgfile.5.mol.
+ * This exotic R-group query files has many R# groups and subsitutes,
+ * to test mainly for getting all valid configurations.
+ */
+ @Test
+ public void testRgroupQueryFile5() throws Exception {
+ String filename = "data/mdl/rgfile.5.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 4);
+
+ //Test combinatorial explosion: R5 has many different configurations
+ Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 17820);
+ }
+
+ /**
+ * Test parsing of RGFile rgfile.6.mol.
+ * This RGFile is incomplete, RGP lines are missing. We still want to
+ * accept it (Symyx/ChemAxon software accepts it too).
+ */
+ @Test (expected=CDKException.class)
+ public void testRgroupQueryFile6() throws Exception {
+ String filename = "data/mdl/rgfile.6.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 3);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 14);
+
+ // This file has missing $RGP blocks. You could argue that this is
+ // thus not a legal query (ie missing query specifications)
+ Assert.assertFalse(rGroupQuery.areSubstituentsDefined());
+
+ //Getting for all configurations won't happen, because not all groups were set
+ rGroupQuery.getAllConfigurations(); // Will raise exception
+
+ }
+
+ /**
+ * Test parsing of RGFile rgfile.7.mol.
+ * This RGFile has APO lines with value 3: both attachment points.
+ *
+ * Also, R32 appears twice, but with different numbers of attachment.
+ * The parser should not trip over this, and make nice configurations.
+ */
+ @Test
+ public void testRgroupQueryFile7() throws Exception {
+ String filename = "data/mdl/rgfile.7.mol";
+ logger.info("Testing: " + filename);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(filename);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ Assert.assertNotNull(rGroupQuery);
+ Assert.assertEquals(rGroupQuery.getRGroupDefinitions().size(), 1);
+ Assert.assertEquals(rGroupQuery.getRootStructure().getAtomCount(), 9);
+ Assert.assertEquals(rGroupQuery.getAllConfigurations().size(), 20);
+
+ }
+
+}
diff --git a/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java b/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java
new file mode 100644
index 00000000000..f232a53348b
--- /dev/null
+++ b/src/test/org/openscience/cdk/io/RGroupQueryWriterTest.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io;
+
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.openscience.cdk.DefaultChemObjectBuilder;
+import org.openscience.cdk.exception.CDKException;
+import org.openscience.cdk.interfaces.IChemObjectBuilder;
+import org.openscience.cdk.isomorphism.matchers.RGroupQuery;
+
+/**
+ * JUnit tests for {@link org.openscience.cdk.io.RGroupQueryWriter}.
+ * Idea: read the test RGfiles into an object model, then writes the
+ * same model out as an RGfile again without changing anything. Then
+ * check that the original inputfile and the outputfile have the same content.
+ *
+ * @cdk.module test-io
+ * @author Mark Rijnbeek
+ */
+public class RGroupQueryWriterTest extends ChemObjectIOTest {
+
+ private static IChemObjectBuilder builder;
+
+ @BeforeClass public static void setup() {
+ builder = DefaultChemObjectBuilder.getInstance();
+ setChemObjectIO(new RGroupQueryWriter());
+ }
+
+ @Test
+ public void testRgroupQueryFile_1() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.1.mol");
+ //System.out.println(rgFile);
+
+ Assert.assertEquals("AAL lines", 0, countSubstring("AAL",rgFile));
+ Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 3, countSubstring("APO",rgFile));
+ Assert.assertTrue (rgFile.contains("M LOG 1 1 0 1 0,1-3"));
+ Assert.assertEquals("Total #lines", 59, countSubstring("\n",rgFile));
+ }
+
+ @Test
+ public void testRgroupQueryFile_2() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.2.mol");
+ //System.out.println(rgFile);
+
+ Assert.assertEquals("AAL lines", 1, countSubstring("AAL",rgFile));
+ Assert.assertEquals("LOG lines", 3, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 5, countSubstring("APO",rgFile));
+ Assert.assertTrue (rgFile.contains("M RGP 4 1 11 2 2 3 2 4 1"));
+ Assert.assertEquals("Total #lines", 107, countSubstring("\n",rgFile));
+ }
+
+ @Test
+ public void testRgroupQueryFile_3() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.3.mol");
+ Assert.assertEquals("AAL lines", 2, countSubstring("AAL",rgFile));
+ Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 2, countSubstring("APO",rgFile));
+ Assert.assertEquals("Total #lines", 66, countSubstring("\n",rgFile));
+ Assert.assertTrue (rgFile.contains("M RGP 2 5 1 7 1"));
+ }
+
+ @Test
+ public void testRgroupQueryFile_4() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.4.mol");
+ Assert.assertEquals("AAL lines", 0, countSubstring("AAL",rgFile));
+ Assert.assertEquals("\\$CTAB lines", 3, countSubstring("\\$CTAB",rgFile));
+ // the R-group is detached, we don't write APO lines (unlike the 0 value APO in the input file)
+ Assert.assertEquals("APO lines", 0, countSubstring("APO",rgFile));
+ Assert.assertEquals("Total #lines", 46, countSubstring("\n",rgFile));
+ Assert.assertTrue (rgFile.contains("M RGP 1 6 1"));
+ }
+
+ @Test
+ public void testRgroupQueryFile_5() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.5.mol");
+ Assert.assertEquals("LOG lines", 4, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 0, countSubstring("APO",rgFile));
+ Assert.assertEquals("M RGP lines", 2, countSubstring("M RGP",rgFile)); //overflow
+ Assert.assertEquals("Total #lines", 132, countSubstring("\n",rgFile));
+ }
+
+ @Test
+ public void testRgroupQueryFile_6() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.6.mol");
+ System.out.println(rgFile);
+ Assert.assertEquals("AAL lines", 1, countSubstring("AAL",rgFile));
+ Assert.assertEquals("LOG lines", 3, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 1, countSubstring("APO",rgFile));
+ Assert.assertEquals("Total #lines", 57, countSubstring("\n",rgFile));
+ }
+
+ @Test
+ public void testRgroupQueryFile_7() throws Exception {
+ String rgFile =recreate("data/mdl/rgfile.7.mol");
+ System.out.println(rgFile);
+ Assert.assertEquals("LOG lines", 1, countSubstring("LOG",rgFile));
+ Assert.assertEquals("APO lines", 2, countSubstring("APO",rgFile));
+ Assert.assertTrue (rgFile.contains("M RGP 3 4 32 6 32 7 32"));
+ Assert.assertEquals("Total #lines", 53, countSubstring("\n",rgFile));
+ }
+
+ private int countSubstring (String regExp,String text) {
+ Pattern p = Pattern.compile(regExp);
+ Matcher m = p.matcher(text); // get a matcher object
+ int count = 0;
+ while(m.find())
+ count++;
+ return count;
+ }
+
+ public void testAcceptsAtLeastOneDebugObject() {}
+ public void testAcceptsAtLeastOneNonotifyObject() {}
+
+ private String recreate(String file) throws CDKException {
+ StringWriter sw = new StringWriter();
+ RGroupQueryWriter rgw = new RGroupQueryWriter (sw);
+ InputStream ins = this.getClass().getClassLoader().getResourceAsStream(file);
+ RGroupQueryReader reader = new RGroupQueryReader(ins);
+ RGroupQuery rGroupQuery = (RGroupQuery)reader.read(new RGroupQuery());
+ rgw.write(rGroupQuery);
+ String out = sw.toString();
+ return out;
+
+ }
+
+
+}
diff --git a/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java b/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java
new file mode 100644
index 00000000000..e2c52ca57d4
--- /dev/null
+++ b/src/test/org/openscience/cdk/io/formats/RGroupQueryFormatTest.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.io.formats;
+
+/**
+ * @cdk.module test-ioformats
+ */
+public class RGroupQueryFormatTest extends ChemFormatMatcherTest {
+ public RGroupQueryFormatTest() {
+ super.setChemFormatMatcher((IChemFormatMatcher)RGroupQueryFormat.getInstance());
+ }
+}
diff --git a/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java b/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java
new file mode 100644
index 00000000000..bae754db92f
--- /dev/null
+++ b/src/test/org/openscience/cdk/isomorphism/matchers/RGroupListTest.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2010 Mark Rijnbeek
+ *
+ * Contact: cdk-devel@lists.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ * All we ask is that proper credit is given for our work, which includes
+ * - but is not limited to - adding the above copyright notice to the beginning
+ * of your source code files, and to any copyright notice that you may
+ * distribute with programs based on this work.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+package org.openscience.cdk.isomorphism.matchers;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.openscience.cdk.CDKTestCase;
+import org.openscience.cdk.exception.CDKException;
+
+
+/**
+ * Checks the functionality of the {@link org.openscience.cdk.isomorphism.matchers.RGroupList},
+ * in particular setting valid 'occurrence' strings.
+ *
+ * @cdk.module test-extra
+ */
+public class RGroupListTest extends CDKTestCase {
+
+ @BeforeClass
+ public static void setUp() {
+ }
+
+ @Test
+ public void testOccurrenceCorrect() throws CDKException {
+ RGroupList rgrLst = new RGroupList(1);
+ rgrLst.setOccurrence("1, 3-7, 9, >11");
+ Assert.assertEquals(rgrLst.getOccurrence(), "1,3-7,9,>11");
+ }
+
+ @Test
+ public void testOccurrenceNull() throws CDKException{
+ RGroupList rgrLst = new RGroupList(1);
+ rgrLst.setOccurrence(null);
+ Assert.assertEquals(rgrLst.getOccurrence(), RGroupList.DEFAULT_OCCURRENCE);
+ }
+
+ @Test (expected = CDKException.class)
+ public void testOccurrenceNumericValues() throws CDKException{
+ RGroupList rgrLst = new RGroupList(1);
+ rgrLst.setOccurrence("a,3,10");
+ }
+
+ @Test (expected = CDKException.class)
+ public void testOccurrenceNoNegativeNumber() throws CDKException{
+ RGroupList rgrLst = new RGroupList(1);
+ rgrLst.setOccurrence("-10");
+ }
+
+ @Test (expected = CDKException.class)
+ public void testOccurrenceNotSmallerThanZero() throws CDKException{
+ RGroupList rgrLst = new RGroupList(1);
+ rgrLst.setOccurrence("<0");
+ }
+
+
+}