Skip to content

Commit

Permalink
MODE-2683 Fixes the behavior of Regex queries for the Lucene Index Pr…
Browse files Browse the repository at this point in the history
…ovider

Note that since Lucene 6, the old Jakarta-based RegexQuery is no longer present and therefore the previous code used Lucene's RegexpQuery which does not support case insensitive searching. This commit fixes the issue by using a custom query.
  • Loading branch information
Horia Chiorean committed Apr 6, 2017
1 parent 7432510 commit cb5fbb9
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 17 deletions.
Expand Up @@ -25,7 +25,6 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.Weight;
Expand Down Expand Up @@ -194,7 +193,7 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
return createQueryForNodesWithFieldEqualTo(likeExpression, fieldName, caseOperation);
}
if (caseOperation == null) {
// We can just do a normal Wildcard or RegEx query ...
// We can just do a normal Wildcard query ...

// '%' matches 0 or more characters
// '_' matches any single character
Expand All @@ -210,16 +209,10 @@ protected static Query createQueryForNodesWithFieldLike(String likeExpression,
return new WildcardQuery(new Term(fieldName, expression));
}
}
// Create a regex query (which will be done using the correct case) ...
// Create a regex query...
String regex = QueryUtil.toRegularExpression(likeExpression);

int flags = Pattern.UNICODE_CASE;
if (caseOperation != null) {
// if we're searching either for the UPPERCASE or LOWERCASE of something, use Case Insensitive matching
// even though it could produce false positive
flags = flags | Pattern.CASE_INSENSITIVE;
}
return new RegexpQuery(new Term(fieldName, regex), flags);
Pattern pattern = Pattern.compile(regex, Pattern.UNICODE_CASE);
return new RegexQuery(fieldName, pattern, caseOperation);
}

/**
Expand Down
Expand Up @@ -49,7 +49,6 @@
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.modeshape.common.annotation.Immutable;
import org.modeshape.common.annotation.ThreadSafe;
import org.modeshape.jcr.JcrI18n;
Expand Down Expand Up @@ -889,16 +888,15 @@ protected Query pathFieldQuery( String field, Operator operator, Object value, F
if (likeExpression.contains("[%]")) {
// We can't use '[%]' because we only want to match digits,
// so handle this using a regex ...
// !!! LUCENE Regexp is not the same as Java's. See the javadoc RegExp
String regex = likeExpression;
regex = regex.replace("[%]", "(\\[[0-9]+\\])?");
regex = regex.replaceAll("\\[\\d+\\]", "\\[[0-9]+\\]");
regex = regex.replaceAll("\\[(\\d+)\\]", "\\\\[$1\\\\]");
//regex = regex.replace("]", "\\]");
regex = regex.replace("*", ".*");
regex = regex.replace("%", ".*").replace("_", ".");
// Now create a regex query ...
int flags = caseOperation == null ? 0 : Pattern.CASE_INSENSITIVE;
return new RegexpQuery(new Term(field, regex), flags);
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
return new RegexQuery(field, pattern, caseOperation);
} else {
return CompareStringQuery.createQueryForNodesWithFieldLike(likeExpression, field, caseOperation);
}
Expand Down
@@ -0,0 +1,39 @@
/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.modeshape.jcr.index.lucene.query;

import java.util.function.Function;
import java.util.regex.Pattern;

/**
* Lucene query which uses Java Regex syntax and flags to match the content stored in the indexes against a {@link Pattern}
*
* @author Horia Chiorean (hchiorea@redhat.com)
*/
public final class RegexQuery extends CompareQuery<String> {

public RegexQuery(String fieldName,
Pattern pattern,
Function<String, String> caseOperation) {
super(fieldName, pattern.pattern(), (storedValue, regexp) -> pattern.matcher(storedValue).matches(), caseOperation);
}

@Override
protected String convertValue(String casedValue) {
return casedValue;
}
}
Expand Up @@ -164,7 +164,6 @@ public void shouldNotReindexOnStartup() throws Exception {

@Test
@FixFor("MODE-2683")
@Ignore
public void shouldUseIndexWithUpperAndLowerOperands() throws Exception {
registerValueIndex("descriptionIndex", "mix:title", "Index for the 'jcr:title' property on mix:title", "*", "jcr:title",
PropertyType.STRING);
Expand Down

0 comments on commit cb5fbb9

Please sign in to comment.