diff --git a/src/main/org/openscience/cdk/smsd/algorithm/cdk/CDKRMapHandler.java b/src/main/org/openscience/cdk/smsd/algorithm/cdk/CDKRMapHandler.java index 5a47d86a298..6bd4c394e12 100644 --- a/src/main/org/openscience/cdk/smsd/algorithm/cdk/CDKRMapHandler.java +++ b/src/main/org/openscience/cdk/smsd/algorithm/cdk/CDKRMapHandler.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.BitSet; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; @@ -33,6 +34,8 @@ import org.openscience.cdk.smsd.helper.FinalMappings; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IAtomContainer; +import org.openscience.cdk.interfaces.IBond; +import org.openscience.cdk.tools.manipulator.BondManipulator; /** * This algorithm derives from the algorithm described in @@ -80,7 +83,6 @@ public static IAtomContainer getTarget() { public static void setTarget(IAtomContainer aTarget) { target = aTarget; } - private List> mappings; private static IAtomContainer source; private static IAtomContainer target; @@ -119,7 +121,7 @@ public void calculateOverlapsAndReduce(IAtomContainer Molecule1, IAtomContainer Stack> allMaxOverlaps = getAllMaximum(reducedList); while (!allMaxOverlaps.empty()) { // System.out.println("source: " + source.getAtomCount() + ", target: " + target.getAtomCount() + ", overl: " + allMaxOverlaps.peek().size()); - List maxOverlapsAtoms = CDKMCS.makeAtomsMapOfBondsMap(allMaxOverlaps.peek(), getSource(), getTarget()); + List> maxOverlapsAtoms = makeAtomsMapOfBondsMap(allMaxOverlaps.peek(), getSource(), getTarget()); // System.out.println("size of maxOverlaps: " + maxOverlapsAtoms.size()); identifyMatchedParts(maxOverlapsAtoms, getSource(), getTarget()); // identifyMatchedParts(allMaxOverlaps.peek(), source, target); @@ -166,9 +168,8 @@ public void calculateOverlapsAndReduceExactMatch(IAtomContainer Molecule1, IAtom Stack> allMaxOverlaps = getAllMaximum(reducedList); while (!allMaxOverlaps.empty()) { - List maxOverlapsAtoms = CDKMCS.makeAtomsMapOfBondsMap(allMaxOverlaps.peek(), getSource(), getTarget()); + List> maxOverlapsAtoms = makeAtomsMapOfBondsMap(allMaxOverlaps.peek(), getSource(), getTarget()); identifyMatchedParts(maxOverlapsAtoms, getSource(), getTarget()); - allMaxOverlaps.pop(); } } @@ -219,64 +220,142 @@ protected List removeRedundantMappingsForSingleAtomCase(List o } -// /** -// * This makes sourceAtom map of matching atoms out of sourceAtom map of matching bonds as produced by the get(Subgraph|Ismorphism)Map methods. -// * -// * @param rMapList The list produced by the getMap method. -// * @param graph1 first molecule. Must not be an IQueryAtomContainer. -// * @param graph2 second molecule. May be an IQueryAtomContainer. -// * @return The mapping found projected on graph1. This is sourceAtom List of CDKRMap objects containing Ids of matching atoms. -// */ -// public static List makeAtomsMapOfBondsMap(List rMapList, IAtomContainer graph1, IAtomContainer graph2) { -// if (rMapList == null) { -// return (rMapList); -// } -// List result = new ArrayList(); -// for (int i = 0; i < rMapList.size(); i++) { -// IBond bond1 = graph1.getBond(rMapList.get(i).getId1()); -// IBond bond2 = graph2.getBond(rMapList.get(i).getId2()); -// IAtom[] atom1 = BondManipulator.getAtomArray(bond1); -// IAtom[] atom2 = BondManipulator.getAtomArray(bond2); -// for (int j = 0; j < 2; j++) { -// List bondsConnectedToAtom1j = graph1.getConnectedBondsList(atom1[j]); -// for (int k = 0; k < bondsConnectedToAtom1j.size(); k++) { -// if (bondsConnectedToAtom1j.get(k) != bond1) { -// IBond testBond = bondsConnectedToAtom1j.get(k); -// for (int m = 0; m < rMapList.size(); m++) { -// IBond testBond2; -// if ((rMapList.get(m)).getId1() == graph1.getBondNumber(testBond)) { -// testBond2 = graph2.getBond((rMapList.get(m)).getId2()); -// for (int n = 0; n < 2; n++) { -// List bondsToTest = graph2.getConnectedBondsList(atom2[n]); -// if (bondsToTest.contains(testBond2)) { -// CDKRMap map; -// if (j == n) { -// map = new CDKRMap(graph1.getAtomNumber(atom1[0]), graph2.getAtomNumber(atom2[0])); -// } else { -// map = new CDKRMap(graph1.getAtomNumber(atom1[1]), graph2.getAtomNumber(atom2[0])); -// } -// if (!result.contains(map)) { -// result.add(map); -// } -// CDKRMap map2; -// if (j == n) { -// map2 = new CDKRMap(graph1.getAtomNumber(atom1[1]), graph2.getAtomNumber(atom2[1])); -// } else { -// map2 = new CDKRMap(graph1.getAtomNumber(atom1[0]), graph2.getAtomNumber(atom2[1])); -// } -// if (!result.contains(map2)) { -// result.add(map2); -// } -// } -// } -// } -// } -// } -// } -// } -// } -// return result; -// } + /** + * This makes sourceAtom map of matching atoms out of sourceAtom map of matching bonds as produced by the get(Subgraph|Ismorphism)Map methods. + * + * @param rMapList The list produced by the getMap method. + * @param graph1 first molecule. Must not be an IQueryAtomContainer. + * @param graph2 second molecule. May be an IQueryAtomContainer. + * @return The mapping found projected on graph1. This is sourceAtom List of CDKRMap objects containing Ids of matching atoms. + */ + public static List> makeAtomsMapOfBondsMap(List rMapList, IAtomContainer graph1, IAtomContainer graph2) { + if (rMapList == null) { + return (null); + } + List> result = null; + if (rMapList.size() == 1) { + result = makeAtomsMapOfBondsMapSingleBond(rMapList, graph1, graph2); + } else { + List resultLocal = new ArrayList(); + for (int i = 0; i < rMapList.size(); i++) { + IBond qBond = graph1.getBond(rMapList.get(i).getId1()); + IBond tBond = graph2.getBond(rMapList.get(i).getId2()); + IAtom[] qAtoms = BondManipulator.getAtomArray(qBond); + IAtom[] tAtoms = BondManipulator.getAtomArray(tBond); + for (int j = 0; j < 2; j++) { + List bondsConnectedToAtom1j = graph1.getConnectedBondsList(qAtoms[j]); + for (int k = 0; k < bondsConnectedToAtom1j.size(); k++) { + if (bondsConnectedToAtom1j.get(k) != qBond) { + IBond testBond = bondsConnectedToAtom1j.get(k); + for (int m = 0; m < rMapList.size(); m++) { + IBond testBond2; + if ((rMapList.get(m)).getId1() == graph1.getBondNumber(testBond)) { + testBond2 = graph2.getBond((rMapList.get(m)).getId2()); + for (int n = 0; n < 2; n++) { + List bondsToTest = graph2.getConnectedBondsList(tAtoms[n]); + if (bondsToTest.contains(testBond2)) { + CDKRMap map; + if (j == n) { + map = new CDKRMap(graph1.getAtomNumber(qAtoms[0]), graph2.getAtomNumber(tAtoms[0])); + } else { + map = new CDKRMap(graph1.getAtomNumber(qAtoms[1]), graph2.getAtomNumber(tAtoms[0])); + } + if (!resultLocal.contains(map)) { + resultLocal.add(map); + } + CDKRMap map2; + if (j == n) { + map2 = new CDKRMap(graph1.getAtomNumber(qAtoms[1]), graph2.getAtomNumber(tAtoms[1])); + } else { + map2 = new CDKRMap(graph1.getAtomNumber(qAtoms[0]), graph2.getAtomNumber(tAtoms[1])); + } + if (!resultLocal.contains(map2)) { + resultLocal.add(map2); + } + } + } + } + } + } + } + } + } + result = new ArrayList>(); + result.add(resultLocal); + } + return result; + } + + /** + * This makes atom map of matching atoms out of atom map of matching bonds as produced by the get(Subgraph|Ismorphism)Map methods. + * @see Added by Asad since CDK one doesn't pick up the correct changes + * @param list The list produced by the getMap method. + * @param sourceGraph first molecule. Must not be an IQueryAtomContainer. + * @param targetGraph second molecule. May be an IQueryAtomContainer. + * @return The mapping found projected on sourceGraph. This is atom List of CDKRMap objects containing Ids of matching atoms. + */ + public static List> makeAtomsMapOfBondsMapSingleBond(List list, IAtomContainer sourceGraph, IAtomContainer targetGraph) { + if (list == null) { + return null; + } + Map bondMap = new HashMap(list.size()); + for (CDKRMap solBondMap : list) { + int id1 = solBondMap.getId1(); + int id2 = solBondMap.getId2(); + IBond qBond = sourceGraph.getBond(id1); + IBond tBond = targetGraph.getBond(id2); + bondMap.put(qBond, tBond); + } + List result1 = new ArrayList(); + List result2 = new ArrayList(); + for (IBond qbond : sourceGraph.bonds()) { + if (bondMap.containsKey(qbond)) { + IBond tbond = bondMap.get(qbond); + CDKRMap map00 = null; + CDKRMap map01 = null; + CDKRMap map10 = null; + CDKRMap map11 = null; + + if ((qbond.getAtom(0).getSymbol().equals(tbond.getAtom(0).getSymbol())) + && (qbond.getAtom(1).getSymbol().equals(tbond.getAtom(1).getSymbol()))) { + map00 = new CDKRMap(sourceGraph.getAtomNumber(qbond.getAtom(0)), + targetGraph.getAtomNumber(tbond.getAtom(0))); + map11 = new CDKRMap(sourceGraph.getAtomNumber(qbond.getAtom(1)), + targetGraph.getAtomNumber(tbond.getAtom(1))); + if (!result1.contains(map00)) { + result1.add(map00); + } + if (!result1.contains(map11)) { + result1.add(map11); + } + } + if ((qbond.getAtom(0).getSymbol().equals(tbond.getAtom(1).getSymbol())) + && (qbond.getAtom(1).getSymbol().equals(tbond.getAtom(0).getSymbol()))) { + map01 = new CDKRMap(sourceGraph.getAtomNumber(qbond.getAtom(0)), + targetGraph.getAtomNumber(tbond.getAtom(1))); + map10 = new CDKRMap(sourceGraph.getAtomNumber(qbond.getAtom(1)), + targetGraph.getAtomNumber(tbond.getAtom(0))); + if (!result2.contains(map01)) { + result2.add(map01); + } + if (!result2.contains(map10)) { + result2.add(map10); + } + } + } + } + List> result = new ArrayList>(); + if (result1.size() == result2.size()) { + result.add(result1); + result.add(result2); + } else if (result1.size() > result2.size()) { + result.add(result1); + } else { + result.add(result2); + } + return result; + } + /** * * @param overlaps @@ -338,33 +417,32 @@ protected Stack> getAllMaximum(List> overlaps) { * @param source * @param target */ - protected void identifyMatchedParts(List list, IAtomContainer source, IAtomContainer target) { + protected void identifyMatchedParts(List> list, IAtomContainer source, IAtomContainer target) { List array1 = new ArrayList(); List array2 = new ArrayList(); - TreeMap atomNumbersFromContainer = new TreeMap(); - /* * We have serial numbers of the bonds/Atoms to delete * Now we will collect the actual bond/Atoms rather than * serial number for deletion. RonP flag check whether Reactant is * mapped on Product or Vise Versa - * + * */ + for (List rMap : list) { + Map atomNumbersFromContainer = new TreeMap(); + for (CDKRMap rmap : rMap) { + IAtom sourceAtom = source.getAtom(rmap.getId1()); + IAtom targetAtom = target.getAtom(rmap.getId2()); - for (CDKRMap rmap : list) { - IAtom sourceAtom = source.getAtom(rmap.getId1()); - IAtom targetAtom = target.getAtom(rmap.getId2()); + array1.add(sourceAtom); + array2.add(targetAtom); - array1.add(sourceAtom); - array2.add(targetAtom); - - int IndexI = source.getAtomNumber(sourceAtom); - int IndexJ = target.getAtomNumber(targetAtom); - - atomNumbersFromContainer.put(IndexI, IndexJ); + int IndexI = source.getAtomNumber(sourceAtom); + int IndexJ = target.getAtomNumber(targetAtom); + atomNumbersFromContainer.put(IndexI, IndexJ); + } /*Added the Mapping Numbers to the FinalMapping* */ getMappings().add(atomNumbersFromContainer); diff --git a/src/test/org/openscience/cdk/smsd/SMSDTest.java b/src/test/org/openscience/cdk/smsd/SMSDTest.java index e96fe77e580..f8d15e28ca5 100644 --- a/src/test/org/openscience/cdk/smsd/SMSDTest.java +++ b/src/test/org/openscience/cdk/smsd/SMSDTest.java @@ -125,11 +125,12 @@ public void testSearchMCS() throws CDKException { SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); IAtomContainer target = null; target = sp.parseSmiles("C\\C=C/Nc1cccc(c1)N(O)\\C=C\\C\\C=C\\C=C/C"); - IAtomContainer queryac = null; - queryac = sp.parseSmiles("Nc1ccccc1"); + IAtomContainer queryac = sp.parseSmiles("Nc1ccccc1"); SMSD smsd1 = new SMSD(Algorithm.DEFAULT, true); smsd1.init(queryac, target, true); smsd1.setChemFilters(true, true, true); + Assert.assertEquals(7, smsd1.getFirstAtomMapping().size()); + Assert.assertEquals(2, smsd1.getAllAtomMapping().size()); assertNotNull(smsd1.getFirstMapping()); } catch (InvalidSmilesException ex) { Logger.getLogger(MCSPlusHandlerTest.class.getName()).log(Level.SEVERE, null, ex); @@ -529,7 +530,23 @@ public void testQueryAtomContainerSubstructure() throws CDKException { Assert.assertTrue(foundMatches); } - @Test + public void testQueryAtomCount() throws CDKException { + SMSD smsd = new SMSD(Algorithm.DEFAULT, true); + SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); + IAtomContainer query = sp.parseSmiles("CC"); + IAtomContainer target = sp.parseSmiles("C1CCC12CCCC2"); + + smsd.init(query, target, false); + boolean foundMatches = smsd.isSubgraph(); + Assert.assertEquals(18, smsd.getAllAtomMapping().size()); + Assert.assertTrue(foundMatches); + + IQueryAtomContainer queryContainer = QueryAtomContainerCreator.createSymbolAndBondOrderQueryContainer(query); + smsd.init(queryContainer, target, false); + foundMatches = smsd.isSubgraph(); + Assert.assertTrue(foundMatches); + } + public void testMatchCount() throws CDKException { SMSD smsd = new SMSD(Algorithm.DEFAULT, true); SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); @@ -547,6 +564,24 @@ public void testMatchCount() throws CDKException { Assert.assertTrue(foundMatches); } + @Test + public void testMatchCountCDKMCS() throws CDKException { + SMSD smsd = new SMSD(Algorithm.DEFAULT, true); + SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance()); + IAtomContainer query = sp.parseSmiles("CC"); + IAtomContainer target = sp.parseSmiles("C1CCC12CCCC2"); + + smsd.init(query, target, false); + boolean foundMatches = smsd.isSubgraph(); + Assert.assertEquals(18, smsd.getAllAtomMapping().size()); + Assert.assertTrue(foundMatches); + + IQueryAtomContainer queryContainer = QueryAtomContainerCreator.createSymbolAndBondOrderQueryContainer(query); + smsd.init(queryContainer, target, false); + foundMatches = smsd.isSubgraph(); + Assert.assertTrue(foundMatches); + } + @Test public void testImpossibleQuery() throws CDKException { SMSD smsd = new SMSD(Algorithm.DEFAULT, true);