Skip to content

Commit

Permalink
XML matching rule
Browse files Browse the repository at this point in the history
  • Loading branch information
semancik committed Mar 4, 2016
1 parent f0c2bac commit a7295f4
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 13 deletions.
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2015 Evolveum
* Copyright (c) 2010-2016 Evolveum
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,6 +32,7 @@ public static MatchingRuleRegistry createRegistry() {
registry.registerMatchingRule(new PolyStringNormMatchingRule());
registry.registerMatchingRule(new ExchangeEmailAddressesMatchingRule());
registry.registerMatchingRule(new DistinguishedNameMatchingRule());
registry.registerMatchingRule(new XmlMatchingRule());

return registry;
}
Expand Down
@@ -0,0 +1,107 @@
/*
* Copyright (c) 2016 Evolveum
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.evolveum.midpoint.prism.match;


import javax.xml.namespace.QName;

import org.apache.commons.lang.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import com.evolveum.midpoint.prism.PrismConstants;
import com.evolveum.midpoint.util.DOMUtil;
import com.evolveum.midpoint.util.logging.Trace;
import com.evolveum.midpoint.util.logging.TraceManager;

/**
* String matching rule that compares strings as XML snippets.
* The XML comparison is not schema aware. It will not handle
* QNames in values correctly. The comparison ignores XML formatting
* (whitespaces between elements).
*
* @author Radovan Semancik
*
*/
public class XmlMatchingRule implements MatchingRule<String> {

public static final Trace LOGGER = TraceManager.getTrace(XmlMatchingRule.class);

public static final QName NAME = new QName(PrismConstants.NS_MATCHING_RULE, "xml");

@Override
public QName getName() {
return NAME;
}

@Override
public boolean isSupported(QName xsdType) {
return (DOMUtil.XSD_STRING.equals(xsdType));
}

/* (non-Javadoc)
* @see com.evolveum.midpoint.model.match.MatchingRule#match(java.lang.Object, java.lang.Object)
*/
@Override
public boolean match(String a, String b) {
if (a == null && b == null) {
return true;
}
if (a == null || b == null) {
return false;
}
try {

Document docA = DOMUtil.parseDocument(a);
Document docB = DOMUtil.parseDocument(b);
return DOMUtil.compareDocument(docA, docB, false, false);

} catch (IllegalStateException | IllegalArgumentException e) {
LOGGER.warn("Invalid XML in XML matching rule: {}", e.getMessage());
// Invalid XML. We do not want to throw the exception from matching rule.
// So fall back to ordinary string comparison.
return StringUtils.equals(a, b);
}
}

/* (non-Javadoc)
* @see com.evolveum.midpoint.prism.match.MatchingRule#normalize(java.lang.Object)
*/
@Override
public String normalize(String original) {
if (original == null) {
return original;
}
try {

Document doc = DOMUtil.parseDocument(original);
DOMUtil.normalize(doc, false);
String out = DOMUtil.printDom(doc, false, true).toString();
return out.trim();

} catch (IllegalStateException | IllegalArgumentException e) {
LOGGER.warn("Invalid XML in XML matching rule: {}", e.getMessage());
return original.trim();
}
}

@Override
public boolean matchRegex(String a, String regex) {
LOGGER.warn("Regular expression matching is not supported for XML data types");
return false;
}

}
Expand Up @@ -117,7 +117,50 @@ public void testPolyStringNorm() throws Exception {
assertMatch(rule, new PolyString("Bar", "bar"), new PolyString("bAR", "bar"));
assertNoMatch(rule, new PolyString("Bar", "bar"), new PolyString("Bar", "barbar"));
}

@Test
public void testXml() throws Exception {
// GIVEN
MatchingRule<String> rule = matchingRuleRegistry.getMatchingRule(XmlMatchingRule.NAME,
DOMUtil.XSD_STRING);
// WHEN, THEN
assertMatch(rule, "<foo>BAR</foo>", "<foo>BAR</foo>");
assertNoMatch(rule, "<foo>BAR</foo>", "<foo>BARbar</foo>");
assertMatch(rule, "<foo>BAR</foo>", " <foo>BAR</foo> ");
assertMatch(rule, "<foo>\n BAR\n</foo>", " <foo>BAR</foo> ");

assertMatch(rule, "<foo>FOO<bar>BAR</bar></foo>", "<foo>FOO<bar>BAR</bar></foo>");
assertNoMatch(rule, "<foo>FOO<bar>BAR</bar></foo>", "<foo>FOO<baZ>BAR</baZ></foo>");
assertNoMatch(rule, "<foo>FOO<bar>BAR</bar></foo>", "<foo><bar>BAR</bar></foo>");
assertMatch(rule, "<foo>FOO<bar>BAR</bar></foo>", "<foo>\n FOO\n <bar>BAR</bar>\n</foo>\n");
assertMatch(rule, "<foo>FOO<bar></bar></foo>", " <foo> FOO <bar/> </foo> ");
assertMatch(rule, "\n\n <foo> \n FOO <bar> </bar> \n </foo>", " <foo> FOO <bar/></foo> ");
assertNoMatch(rule, "\n\n <foo> \n FOO <bar> X </bar> \n </foo>", " <foo> FOO <bar/></foo> ");
assertMatch(rule, "<foo>FOO<bar>BAR</bar></foo>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<foo>\n FOO\n <bar>BAR</bar>\n</foo>\n");
assertMatch(rule, "<foo> \n <!-- dada --> FOO <bar> </bar> \n </foo>", "<!-- bubu --> <foo> FOO <!-- he --> <bar/><!-- hihi --></foo> ");
assertMatch(rule, "<foo>FOO <bar/> \n </foo>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- blahblah ... AS IS ... blah -->\n<foo> FOO <!-- he --> <bar/><!-- hihi --></foo> ");

// Invalid XML
assertMatch(rule, "<foo>FOO<bar>BAR</foo>", "<foo>FOO<bar>BAR</foo>");
assertNoMatch(rule, "<foo>FOO<bar>BAR</foo>", "<foo>FOO<bar>BAR</bar></foo>");

// normalization
assertNormalized(rule, "<foo>BAR</foo>", "<foo>BAR</foo>");
assertNormalized(rule, "<foo>BAR</foo>", " <foo> BAR </foo> ");
assertNormalized(rule, "<foo>BAR</foo>", "<foo>\n BAR\n</foo>");
assertNormalized(rule, "<foo>FOO<bar/></foo>", "\n\n <foo> \n FOO <bar> </bar> \n </foo>");
assertNormalized(rule, "<foo>FOOfoo<bar/></foo>", " <foo> FOOfoo <bar/></foo> ");
assertNormalized(rule, "<foo>FOO<bar/></foo>", "\n\n <foo> \n FOO <bar> </bar> \n </foo>");
assertNormalized(rule, "<foo>FOO<bar/></foo>", " <foo> FOO <bar/></foo> ");
assertNormalized(rule, "<foo>FOO<bar/></foo>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?> <foo> FOO <bar/></foo> ");
assertNormalized(rule, "<foo>FOO<bar/></foo>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?> <!-- bubu --> "
+ "<foo> FOO <!-- hehe --> <bar/> <!-- hah! --> </foo> ");

// Invalid XML
assertNormalized(rule, "<foo>FOO<bar> BAR </foo>", "<foo>FOO<bar> BAR </foo> ");
}


private <T> void assertMatch(MatchingRule<T> rule, T a, T b) throws SchemaException {
assertTrue("Values '"+a+"' and '"+b+"' does not match; rule: "+rule, rule.match(a, b));
}
Expand Down
73 changes: 61 additions & 12 deletions infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java
Expand Up @@ -1045,6 +1045,10 @@ public static Element createSubElement(Element parent, QName subElementQName) {
}

public static boolean compareElement(Element a, Element b, boolean considerNamespacePrefixes) {
return compareElement(a, b, considerNamespacePrefixes, true);
}

public static boolean compareElement(Element a, Element b, boolean considerNamespacePrefixes, boolean considerWhitespaces) {
if (a==b) {
return true;
}
Expand All @@ -1060,20 +1064,40 @@ public static boolean compareElement(Element a, Element b, boolean considerNames
if (!compareAttributes(a.getAttributes(),b.getAttributes(), considerNamespacePrefixes)) {
return false;
}
if (!compareNodeList(a.getChildNodes(),b.getChildNodes(), considerNamespacePrefixes)) {
if (!compareNodeList(a.getChildNodes(),b.getChildNodes(), considerNamespacePrefixes, considerWhitespaces)) {
return false;
}
return true;
}

public static boolean compareDocument(Document a, Document b, boolean considerNamespacePrefixes, boolean considerWhitespaces) {
if (a==b) {
return true;
}
if (a == null && b == null) {
return true;
}
if (a == null || b == null) {
return false;
}
if (!compareNodeList(a.getChildNodes(),b.getChildNodes(), considerNamespacePrefixes, considerWhitespaces)) {
return false;
}
return true;
}

public static boolean compareElementList(List<Element> aList, List<Element> bList, boolean considerNamespacePrefixes) {
return compareElementList(aList, bList, considerNamespacePrefixes, true);
}

public static boolean compareElementList(List<Element> aList, List<Element> bList, boolean considerNamespacePrefixes, boolean considerWhitespaces) {
if (aList.size() != bList.size()) {
return false;
}
Iterator<Element> bIterator = bList.iterator();
for (Element a: aList) {
Element b = bIterator.next();
if (!compareElement(a, b, considerNamespacePrefixes)) {
if (!compareElement(a, b, considerNamespacePrefixes, considerWhitespaces)) {
return false;
}
}
Expand Down Expand Up @@ -1133,7 +1157,7 @@ private static Attr findAttributeByQName(NamedNodeMap attrs, QName qname) {
return null;
}

private static boolean compareNodeList(NodeList a, NodeList b, boolean considerNamespacePrefixes) {
private static boolean compareNodeList(NodeList a, NodeList b, boolean considerNamespacePrefixes, boolean considerWhitespaces) {
if (a==b) {
return true;
}
Expand All @@ -1160,11 +1184,11 @@ private static boolean compareNodeList(NodeList a, NodeList b, boolean considerN
return false;
}
if (aItem.getNodeType() == Node.ELEMENT_NODE) {
if (!compareElement((Element)aItem, (Element)bItem, considerNamespacePrefixes)) {
if (!compareElement((Element)aItem, (Element)bItem, considerNamespacePrefixes, considerWhitespaces)) {
return false;
}
} else if (aItem.getNodeType() == Node.TEXT_NODE) {
if (!compareTextNodeValues(aItem.getTextContent(),bItem.getTextContent())) {
if (!compareTextNodeValues(aItem.getTextContent(), bItem.getTextContent(), considerWhitespaces)) {
return false;
}
}
Expand All @@ -1173,32 +1197,57 @@ private static boolean compareNodeList(NodeList a, NodeList b, boolean considerN
}

public static boolean compareTextNodeValues(String a, String b) {
return compareTextNodeValues(a, b, true);
}

public static boolean compareTextNodeValues(String a, String b, boolean considerWhitespaces) {
if (StringUtils.equals(a,b)) {
return true;
}
if (!considerWhitespaces && StringUtils.trimToEmpty(a).equals(StringUtils.trimToEmpty(b))) {
return true;
}
if (StringUtils.isBlank(a) && StringUtils.isBlank(b)) {
return true;
}
return false;
}

/**
* Remove comments and whitespace-only text nodes
*/
private static List<Node> canonizeNodeList(NodeList nodelist) {
List<Node> list = new ArrayList<Node>(nodelist.getLength());
for (int i = 0; i < nodelist.getLength(); i++) {
Node aItem = nodelist.item(i);
if (aItem.getNodeType() == Node.ELEMENT_NODE || aItem.getNodeType() == Node.ATTRIBUTE_NODE) {
list.add(aItem);
} else if (aItem.getNodeType() == Node.TEXT_NODE || aItem.getNodeType() == Node.CDATA_SECTION_NODE) {
if (!aItem.getTextContent().matches("\\s*")) {
list.add(aItem);
}
}
}
return list;
}

public static void normalize(Node node, boolean keepWhitespaces) {
NodeList childNodes = node.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node aItem = childNodes.item(i);
if (aItem.getNodeType() == Node.COMMENT_NODE) {
continue;
node.removeChild(aItem);
i--;
} else if (aItem.getNodeType() == Node.TEXT_NODE) {
if (aItem.getTextContent().matches("\\s*")) {
continue;
node.removeChild(aItem);
i--;
} else {
if (!keepWhitespaces) {
aItem.setTextContent(aItem.getTextContent().trim());
}
}
} else if (aItem.getNodeType() == Node.ELEMENT_NODE) {
normalize(aItem, keepWhitespaces);
}
list.add(aItem);
}
return list;
}

public static boolean isJunk(Node node) {
Expand Down

0 comments on commit a7295f4

Please sign in to comment.