diff --git a/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java b/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java index 9d9bcbd833e..38a79353e5e 100644 --- a/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java +++ b/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java @@ -30,6 +30,17 @@ * @author semancik */ public class TrivialItemPathParser { + // This is using regexp to "parse" the declarations. It is not ideal, + // it does not check the syntax, does not show reasonable errors, etc. + // But it was all done in like 20minutes. Good value/price ;-) + private static final String PARSE_REGEX = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)"; + private static final Pattern PARSE_PATTERN = Pattern.compile(PARSE_REGEX); + + private static final String NBWS_HEAD_REGEX = "^[\\p{Z}\\s]+"; + private static final Pattern NBSP_HEAD_PATTERN = Pattern.compile(NBWS_HEAD_REGEX); + + private static final String NBWS_TAIL_REGEX = "[\\p{Z}\\s]+$"; + private static final Pattern NBWS_TAIL_PATTERN = Pattern.compile(NBWS_TAIL_REGEX); private final Map namespaceMap = new HashMap<>(); private String pureItemPathString; @@ -42,13 +53,8 @@ public static TrivialItemPathParser parse(String itemPath) { TrivialItemPathParser parser = new TrivialItemPathParser(); - // This is using regexp to "parse" the declarations. It is not ideal, - // it does not check the syntax, does not show reasonable errors, etc. - // But it was all done in like 20minutes. Good value/price ;-) - String regexp = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)"; - Pattern pattern = Pattern.compile(regexp); - Matcher matcher = pattern.matcher(itemPath); + Matcher matcher = PARSE_PATTERN.matcher(itemPath); int lastEnd = 0; while (matcher.find()) { @@ -71,8 +77,7 @@ public static TrivialItemPathParser parse(String itemPath) { // Trim whitechars // trim() won't do here. it is not trimming non-breakable spaces. - - parser.pureItemPathString = parser.pureItemPathString.replaceFirst("^[\\p{Z}\\s]+", "").replaceFirst("[\\p{Z}\\s]+$", ""); + parser.pureItemPathString = NBWS_TAIL_PATTERN.matcher(NBSP_HEAD_PATTERN.matcher(parser.pureItemPathString).replaceFirst("")).replaceFirst(""); return parser; } diff --git a/infra/prism/src/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java b/infra/prism/src/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java index 629de8bcd2f..4813b8e30de 100644 --- a/infra/prism/src/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java +++ b/infra/prism/src/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java @@ -16,7 +16,7 @@ package com.evolveum.midpoint.prism.polystring; import java.text.Normalizer; - +import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; /** @@ -24,6 +24,10 @@ * */ public class PrismDefaultPolyStringNormalizer implements PolyStringNormalizer { + private static final String MALFORMED_REGEX = "[^\\w\\s\\d]"; + private static final Pattern MALFORMED_PATTERN = Pattern.compile(MALFORMED_REGEX); + private static final String WHITESPACE_REGEX = "\\s+"; + private static final Pattern WHITESPACE_PATTERN = Pattern.compile(WHITESPACE_REGEX); /* (non-Javadoc) * @see com.evolveum.midpoint.prism.polystring.PolyStringNormalizer#normalize(java.lang.String) @@ -35,8 +39,8 @@ public String normalize(String orig) { } String s = StringUtils.trim(orig); s = Normalizer.normalize(s, Normalizer.Form.NFKD); - s = s.replaceAll("[^\\w\\s\\d]", ""); - s = s.replaceAll("\\s+", " "); + s = MALFORMED_PATTERN.matcher(s).replaceAll(""); + s = WHITESPACE_PATTERN.matcher(s).replaceAll(" "); if (StringUtils.isBlank(s)) { s = ""; } diff --git a/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java b/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java index 01a62a42952..baafd75abd8 100644 --- a/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java +++ b/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java @@ -24,6 +24,7 @@ import com.evolveum.prism.xml.ns._public.types_3.RawType; import javax.xml.namespace.QName; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -33,6 +34,8 @@ public class PrismPrettyPrinter { private static final Trace LOGGER = TraceManager.getTrace(PrismPrettyPrinter.class); + private static final String CRLF_REGEX = "(\\r|\\n|\\r\\n)+"; + private static final Pattern CRLF_PATTERN = Pattern.compile(CRLF_REGEX); public static String prettyPrint(RawType raw) { if (raw.getAlreadyParsedValue() != null) { @@ -41,7 +44,7 @@ public static String prettyPrint(RawType raw) { if (raw.getXnode() != null && raw.getPrismContext() != null) { try { String jsonText = raw.getPrismContext().jsonSerializer().serialize(raw.getRootXNode(new QName("value"))); - return jsonText.replaceAll("(\\r|\\n|\\r\\n)+", ""); + return CRLF_PATTERN.matcher(jsonText).replaceAll(""); } catch (Throwable t) { LoggingUtils.logException(LOGGER, "Couldn't serialize raw value for pretty printing, using 'toString' instead: {}", t, raw.getXnode()); } diff --git a/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java b/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java index c23ad1bdff5..83d2481a235 100644 --- a/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java +++ b/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java @@ -25,7 +25,7 @@ import java.io.StringWriter; import java.util.*; import java.util.Map.Entry; - +import java.util.regex.Pattern; import javax.xml.XMLConstants; import javax.xml.namespace.QName; import javax.xml.parsers.DocumentBuilder; @@ -1206,6 +1206,9 @@ public static boolean compareTextNodeValues(String a, String b, boolean consider return false; } + private static final String SPACE_REGEX = "\\s*"; + private static final Pattern SPACE_PATTERN = Pattern.compile(SPACE_REGEX); + private static List canonizeNodeList(NodeList nodelist) { List list = new ArrayList(nodelist.getLength()); for (int i = 0; i < nodelist.getLength(); i++) { @@ -1213,14 +1216,14 @@ private static List canonizeNodeList(NodeList nodelist) { if (aItem.getNodeType() == Node.ELEMENT_NODE || aItem.getNodeType() == Node.ATTRIBUTE_NODE) { list.add(aItem); } else if (aItem.getNodeType() == Node.TEXT_NODE || aItem.getNodeType() == Node.CDATA_SECTION_NODE) { - if (!aItem.getTextContent().matches("\\s*")) { + if (!SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) { list.add(aItem); } } } return list; } - + public static void normalize(Node node, boolean keepWhitespaces) { NodeList childNodes = node.getChildNodes(); for (int i = 0; i < childNodes.getLength(); i++) { @@ -1229,7 +1232,7 @@ public static void normalize(Node node, boolean keepWhitespaces) { node.removeChild(aItem); i--; } else if (aItem.getNodeType() == Node.TEXT_NODE) { - if (aItem.getTextContent().matches("\\s*")) { + if (SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) { node.removeChild(aItem); i--; } else { @@ -1243,6 +1246,9 @@ public static void normalize(Node node, boolean keepWhitespaces) { } } + private static final String WS_ONLY_REGEX = "^\\s*$"; + private static final Pattern WS_ONLY_PATTERN = Pattern.compile(WS_ONLY_REGEX); + public static boolean isJunk(Node node) { if (node.getNodeType() == Node.COMMENT_NODE) { return true; @@ -1252,7 +1258,7 @@ public static boolean isJunk(Node node) { } if (node.getNodeType() == Node.TEXT_NODE) { Text text = (Text)node; - if (text.getTextContent().matches("^\\s*$")) { + if (WS_ONLY_PATTERN.matcher(text.getTextContent()).matches()) { return true; } return false; diff --git a/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java b/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java index 7fed1b30954..3c17f5f0693 100644 --- a/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java +++ b/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java @@ -17,6 +17,7 @@ package com.evolveum.midpoint.util; import java.util.*; +import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.xml.namespace.QName; @@ -333,11 +334,14 @@ public static boolean isPrefixUndeclared(String namespacePrefix) { return namespacePrefix != null && namespacePrefix.startsWith(UNDECLARED_PREFIX_MARK); } + private static final String WORDS_COLON_REGEX = "^\\w+:.*"; + private static final Pattern WORDS_COLON_PATTERN = Pattern.compile(WORDS_COLON_REGEX); + public static boolean isUri(String string) { if (string == null) { return false; } - return string.matches("^\\w+:.*"); + return WORDS_COLON_PATTERN.matcher(string).matches(); } public static String getLocalPart(QName name) {