Pre-compile regular expressions

Pattern is thread-safe and pre-compiling it provides superior performance when compared to String.matches()/replace()/replaceAll(). Signed-off-by: Robert Varga <nite@hq.sk>
Evolveum · Sep 2, 2017 · 3356e16 · 3356e16
1 parent 23d1596
commit 3356e16
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 18 deletions.
diff --git a/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java b/infra/prism/src/main/java/com/evolveum/midpoint/prism/marshaller/TrivialItemPathParser.java
@@ -30,6 +30,17 @@
  * @author semancik
  */
 public class TrivialItemPathParser {
+    // This is using regexp to "parse" the declarations. It is not ideal,
+    // it does not check the syntax, does not show reasonable errors, etc.
+    // But it was all done in like 20minutes. Good value/price ;-)
+    private static final String PARSE_REGEX = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)";
+    private static final Pattern PARSE_PATTERN = Pattern.compile(PARSE_REGEX);
+
+    private static final String NBWS_HEAD_REGEX = "^[\\p{Z}\\s]+";
+    private static final Pattern NBSP_HEAD_PATTERN = Pattern.compile(NBWS_HEAD_REGEX);
+
+    private static final String NBWS_TAIL_REGEX = "[\\p{Z}\\s]+$";
+    private static final Pattern NBWS_TAIL_PATTERN = Pattern.compile(NBWS_TAIL_REGEX);
 
     private final Map<String,String> namespaceMap = new HashMap<>();
     private String pureItemPathString;
@@ -42,13 +53,8 @@ public static TrivialItemPathParser parse(String itemPath) {
 
         TrivialItemPathParser parser = new TrivialItemPathParser();
 
-        // This is using regexp to "parse" the declarations. It is not ideal,
-        // it does not check the syntax, does not show reasonable errors, etc.
-        // But it was all done in like 20minutes. Good value/price ;-)
 
-        String regexp = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)";
-        Pattern pattern = Pattern.compile(regexp);
-        Matcher matcher = pattern.matcher(itemPath);
+        Matcher matcher = PARSE_PATTERN.matcher(itemPath);
 
         int lastEnd = 0;
         while (matcher.find()) {
@@ -71,8 +77,7 @@ public static TrivialItemPathParser parse(String itemPath) {
 
         // Trim whitechars
         // trim() won't do here. it is not trimming non-breakable spaces.
-
-        parser.pureItemPathString = parser.pureItemPathString.replaceFirst("^[\\p{Z}\\s]+", "").replaceFirst("[\\p{Z}\\s]+$", "");
+        parser.pureItemPathString = NBWS_TAIL_PATTERN.matcher(NBSP_HEAD_PATTERN.matcher(parser.pureItemPathString).replaceFirst("")).replaceFirst("");
 
         return parser;
     }

diff --git a/...rc/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java b/...rc/main/java/com/evolveum/midpoint/prism/polystring/PrismDefaultPolyStringNormalizer.java
@@ -16,14 +16,18 @@
 package com.evolveum.midpoint.prism.polystring;
 
 import java.text.Normalizer;
-
+import java.util.regex.Pattern;
 import org.apache.commons.lang.StringUtils;
 
 /**
  * @author semancik
  *
  */
 public class PrismDefaultPolyStringNormalizer implements PolyStringNormalizer {
+	private static final String MALFORMED_REGEX = "[^\\w\\s\\d]";
+	private static final Pattern MALFORMED_PATTERN = Pattern.compile(MALFORMED_REGEX);
+	private static final String WHITESPACE_REGEX = "\\s+";
+	private static final Pattern WHITESPACE_PATTERN = Pattern.compile(WHITESPACE_REGEX);
 
 	/* (non-Javadoc)
 	 * @see com.evolveum.midpoint.prism.polystring.PolyStringNormalizer#normalize(java.lang.String)
@@ -35,8 +39,8 @@ public String normalize(String orig) {
 		}
 		String s = StringUtils.trim(orig);
 		s = Normalizer.normalize(s, Normalizer.Form.NFKD);
-		s = s.replaceAll("[^\\w\\s\\d]", "");
-		s = s.replaceAll("\\s+", " ");
+		s = MALFORMED_PATTERN.matcher(s).replaceAll("");
+		s = WHITESPACE_PATTERN.matcher(s).replaceAll(" ");
 		if (StringUtils.isBlank(s)) {
 			s = "";
 		}

diff --git a/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java b/infra/prism/src/main/java/com/evolveum/midpoint/prism/util/PrismPrettyPrinter.java
@@ -24,6 +24,7 @@
 import com.evolveum.prism.xml.ns._public.types_3.RawType;
 
 import javax.xml.namespace.QName;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 /**
@@ -33,6 +34,8 @@
 public class PrismPrettyPrinter {
 
 	private static final Trace LOGGER = TraceManager.getTrace(PrismPrettyPrinter.class);
+	private static final String CRLF_REGEX = "(\\r|\\n|\\r\\n)+";
+	private static final Pattern CRLF_PATTERN = Pattern.compile(CRLF_REGEX);
 
 	public static String prettyPrint(RawType raw) {
 		if (raw.getAlreadyParsedValue() != null) {
@@ -41,7 +44,7 @@ public static String prettyPrint(RawType raw) {
 		if (raw.getXnode() != null && raw.getPrismContext() != null) {
 			try {
 				String jsonText = raw.getPrismContext().jsonSerializer().serialize(raw.getRootXNode(new QName("value")));
-				return jsonText.replaceAll("(\\r|\\n|\\r\\n)+", "");
+				return CRLF_PATTERN.matcher(jsonText).replaceAll("");
 			} catch (Throwable t) {
 				LoggingUtils.logException(LOGGER, "Couldn't serialize raw value for pretty printing, using 'toString' instead: {}", t, raw.getXnode());
 			}

diff --git a/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java b/infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java
@@ -25,7 +25,7 @@
 import java.io.StringWriter;
 import java.util.*;
 import java.util.Map.Entry;
-
+import java.util.regex.Pattern;
 import javax.xml.XMLConstants;
 import javax.xml.namespace.QName;
 import javax.xml.parsers.DocumentBuilder;
@@ -1206,21 +1206,24 @@ public static boolean compareTextNodeValues(String a, String b, boolean consider
 		return false;
 	}
 
+	private static final String SPACE_REGEX = "\\s*";
+	private static final Pattern SPACE_PATTERN = Pattern.compile(SPACE_REGEX);
+
 	private static List<Node> canonizeNodeList(NodeList nodelist) {
 		List<Node> list = new ArrayList<Node>(nodelist.getLength());
 		for (int i = 0; i < nodelist.getLength(); i++) {
 			Node aItem = nodelist.item(i);
 			if (aItem.getNodeType() == Node.ELEMENT_NODE || aItem.getNodeType() == Node.ATTRIBUTE_NODE) {
 				list.add(aItem);
 			} else if (aItem.getNodeType() == Node.TEXT_NODE || aItem.getNodeType() == Node.CDATA_SECTION_NODE) {
-				if (!aItem.getTextContent().matches("\\s*")) {
+				if (!SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) {
 					list.add(aItem);
 				}
 			}
 		}
 		return list;
 	}
-	
+
 	public static void normalize(Node node, boolean keepWhitespaces) {
 		NodeList childNodes = node.getChildNodes();
 		for (int i = 0; i < childNodes.getLength(); i++) {
@@ -1229,7 +1232,7 @@ public static void normalize(Node node, boolean keepWhitespaces) {
 				node.removeChild(aItem);
 				i--;
 			} else if (aItem.getNodeType() == Node.TEXT_NODE) {
-				if (aItem.getTextContent().matches("\\s*")) {
+				if (SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) {
 					node.removeChild(aItem);
 					i--;
 				} else {
@@ -1243,6 +1246,9 @@ public static void normalize(Node node, boolean keepWhitespaces) {
 		}
 	}
 
+	private static final String WS_ONLY_REGEX = "^\\s*$";
+	private static final Pattern WS_ONLY_PATTERN = Pattern.compile(WS_ONLY_REGEX);
+
 	public static boolean isJunk(Node node) {
 		if (node.getNodeType() == Node.COMMENT_NODE) {
 			return true;
@@ -1252,7 +1258,7 @@ public static boolean isJunk(Node node) {
 		}
 		if (node.getNodeType() == Node.TEXT_NODE) {
 			Text text = (Text)node;
-			if (text.getTextContent().matches("^\\s*$")) {
+			if (WS_ONLY_PATTERN.matcher(text.getTextContent()).matches()) {
 				return true;
 			}
 			return false;

diff --git a/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java b/infra/util/src/main/java/com/evolveum/midpoint/util/QNameUtil.java
@@ -17,6 +17,7 @@
 package com.evolveum.midpoint.util;
 
 import java.util.*;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 import javax.xml.namespace.QName;
@@ -333,11 +334,14 @@ public static boolean isPrefixUndeclared(String namespacePrefix) {
         return namespacePrefix != null && namespacePrefix.startsWith(UNDECLARED_PREFIX_MARK);
     }
 
+	private static final String WORDS_COLON_REGEX = "^\\w+:.*";
+	private static final Pattern WORDS_COLON_PATTERN = Pattern.compile(WORDS_COLON_REGEX);
+
 	public static boolean isUri(String string) {
 		if (string == null) {
 			return false;
 		}
-		return string.matches("^\\w+:.*");
+		return WORDS_COLON_PATTERN.matcher(string).matches();
 	}
 
 	public static String getLocalPart(QName name) {