Skip to content

Commit

Permalink
Pre-compile regular expressions
Browse files Browse the repository at this point in the history
Pattern is thread-safe and pre-compiling it provides superior
performance when compared to String.matches()/replace()/replaceAll().

Signed-off-by: Robert Varga <nite@hq.sk>
  • Loading branch information
rovarga committed Sep 2, 2017
1 parent 23d1596 commit 3356e16
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 18 deletions.
Expand Up @@ -30,6 +30,17 @@
* @author semancik
*/
public class TrivialItemPathParser {
// This is using regexp to "parse" the declarations. It is not ideal,
// it does not check the syntax, does not show reasonable errors, etc.
// But it was all done in like 20minutes. Good value/price ;-)
private static final String PARSE_REGEX = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)";
private static final Pattern PARSE_PATTERN = Pattern.compile(PARSE_REGEX);

private static final String NBWS_HEAD_REGEX = "^[\\p{Z}\\s]+";
private static final Pattern NBSP_HEAD_PATTERN = Pattern.compile(NBWS_HEAD_REGEX);

private static final String NBWS_TAIL_REGEX = "[\\p{Z}\\s]+$";
private static final Pattern NBWS_TAIL_PATTERN = Pattern.compile(NBWS_TAIL_REGEX);

private final Map<String,String> namespaceMap = new HashMap<>();
private String pureItemPathString;
Expand All @@ -42,13 +53,8 @@ public static TrivialItemPathParser parse(String itemPath) {

TrivialItemPathParser parser = new TrivialItemPathParser();

// This is using regexp to "parse" the declarations. It is not ideal,
// it does not check the syntax, does not show reasonable errors, etc.
// But it was all done in like 20minutes. Good value/price ;-)

String regexp = "(^|;)[\\s\\p{Z}]*declare[\\s\\p{Z}]+(default[\\s\\p{Z}]+)?namespace[\\s\\p{Z}]+((\\w+)[\\s\\p{Z}]*=[\\s\\p{Z}]*)?(['\"])([^'\"]*)\\5[\\s\\p{Z}]*(?=;)";
Pattern pattern = Pattern.compile(regexp);
Matcher matcher = pattern.matcher(itemPath);
Matcher matcher = PARSE_PATTERN.matcher(itemPath);

int lastEnd = 0;
while (matcher.find()) {
Expand All @@ -71,8 +77,7 @@ public static TrivialItemPathParser parse(String itemPath) {

// Trim whitechars
// trim() won't do here. it is not trimming non-breakable spaces.

parser.pureItemPathString = parser.pureItemPathString.replaceFirst("^[\\p{Z}\\s]+", "").replaceFirst("[\\p{Z}\\s]+$", "");
parser.pureItemPathString = NBWS_TAIL_PATTERN.matcher(NBSP_HEAD_PATTERN.matcher(parser.pureItemPathString).replaceFirst("")).replaceFirst("");

return parser;
}
Expand Down
Expand Up @@ -16,14 +16,18 @@
package com.evolveum.midpoint.prism.polystring;

import java.text.Normalizer;

import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;

/**
* @author semancik
*
*/
public class PrismDefaultPolyStringNormalizer implements PolyStringNormalizer {
private static final String MALFORMED_REGEX = "[^\\w\\s\\d]";
private static final Pattern MALFORMED_PATTERN = Pattern.compile(MALFORMED_REGEX);
private static final String WHITESPACE_REGEX = "\\s+";
private static final Pattern WHITESPACE_PATTERN = Pattern.compile(WHITESPACE_REGEX);

/* (non-Javadoc)
* @see com.evolveum.midpoint.prism.polystring.PolyStringNormalizer#normalize(java.lang.String)
Expand All @@ -35,8 +39,8 @@ public String normalize(String orig) {
}
String s = StringUtils.trim(orig);
s = Normalizer.normalize(s, Normalizer.Form.NFKD);
s = s.replaceAll("[^\\w\\s\\d]", "");
s = s.replaceAll("\\s+", " ");
s = MALFORMED_PATTERN.matcher(s).replaceAll("");
s = WHITESPACE_PATTERN.matcher(s).replaceAll(" ");
if (StringUtils.isBlank(s)) {
s = "";
}
Expand Down
Expand Up @@ -24,6 +24,7 @@
import com.evolveum.prism.xml.ns._public.types_3.RawType;

import javax.xml.namespace.QName;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand All @@ -33,6 +34,8 @@
public class PrismPrettyPrinter {

private static final Trace LOGGER = TraceManager.getTrace(PrismPrettyPrinter.class);
private static final String CRLF_REGEX = "(\\r|\\n|\\r\\n)+";
private static final Pattern CRLF_PATTERN = Pattern.compile(CRLF_REGEX);

public static String prettyPrint(RawType raw) {
if (raw.getAlreadyParsedValue() != null) {
Expand All @@ -41,7 +44,7 @@ public static String prettyPrint(RawType raw) {
if (raw.getXnode() != null && raw.getPrismContext() != null) {
try {
String jsonText = raw.getPrismContext().jsonSerializer().serialize(raw.getRootXNode(new QName("value")));
return jsonText.replaceAll("(\\r|\\n|\\r\\n)+", "");
return CRLF_PATTERN.matcher(jsonText).replaceAll("");
} catch (Throwable t) {
LoggingUtils.logException(LOGGER, "Couldn't serialize raw value for pretty printing, using 'toString' instead: {}", t, raw.getXnode());
}
Expand Down
16 changes: 11 additions & 5 deletions infra/util/src/main/java/com/evolveum/midpoint/util/DOMUtil.java
Expand Up @@ -25,7 +25,7 @@
import java.io.StringWriter;
import java.util.*;
import java.util.Map.Entry;

import java.util.regex.Pattern;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
Expand Down Expand Up @@ -1206,21 +1206,24 @@ public static boolean compareTextNodeValues(String a, String b, boolean consider
return false;
}

private static final String SPACE_REGEX = "\\s*";
private static final Pattern SPACE_PATTERN = Pattern.compile(SPACE_REGEX);

private static List<Node> canonizeNodeList(NodeList nodelist) {
List<Node> list = new ArrayList<Node>(nodelist.getLength());
for (int i = 0; i < nodelist.getLength(); i++) {
Node aItem = nodelist.item(i);
if (aItem.getNodeType() == Node.ELEMENT_NODE || aItem.getNodeType() == Node.ATTRIBUTE_NODE) {
list.add(aItem);
} else if (aItem.getNodeType() == Node.TEXT_NODE || aItem.getNodeType() == Node.CDATA_SECTION_NODE) {
if (!aItem.getTextContent().matches("\\s*")) {
if (!SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) {
list.add(aItem);
}
}
}
return list;
}

public static void normalize(Node node, boolean keepWhitespaces) {
NodeList childNodes = node.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Expand All @@ -1229,7 +1232,7 @@ public static void normalize(Node node, boolean keepWhitespaces) {
node.removeChild(aItem);
i--;
} else if (aItem.getNodeType() == Node.TEXT_NODE) {
if (aItem.getTextContent().matches("\\s*")) {
if (SPACE_PATTERN.matcher(aItem.getTextContent()).matches()) {
node.removeChild(aItem);
i--;
} else {
Expand All @@ -1243,6 +1246,9 @@ public static void normalize(Node node, boolean keepWhitespaces) {
}
}

private static final String WS_ONLY_REGEX = "^\\s*$";
private static final Pattern WS_ONLY_PATTERN = Pattern.compile(WS_ONLY_REGEX);

public static boolean isJunk(Node node) {
if (node.getNodeType() == Node.COMMENT_NODE) {
return true;
Expand All @@ -1252,7 +1258,7 @@ public static boolean isJunk(Node node) {
}
if (node.getNodeType() == Node.TEXT_NODE) {
Text text = (Text)node;
if (text.getTextContent().matches("^\\s*$")) {
if (WS_ONLY_PATTERN.matcher(text.getTextContent()).matches()) {
return true;
}
return false;
Expand Down
Expand Up @@ -17,6 +17,7 @@
package com.evolveum.midpoint.util;

import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.xml.namespace.QName;
Expand Down Expand Up @@ -333,11 +334,14 @@ public static boolean isPrefixUndeclared(String namespacePrefix) {
return namespacePrefix != null && namespacePrefix.startsWith(UNDECLARED_PREFIX_MARK);
}

private static final String WORDS_COLON_REGEX = "^\\w+:.*";
private static final Pattern WORDS_COLON_PATTERN = Pattern.compile(WORDS_COLON_REGEX);

public static boolean isUri(String string) {
if (string == null) {
return false;
}
return string.matches("^\\w+:.*");
return WORDS_COLON_PATTERN.matcher(string).matches();
}

public static String getLocalPart(QName name) {
Expand Down

0 comments on commit 3356e16

Please sign in to comment.