Skip to content

Commit

Permalink
Remove lowercase_expanded_terms and locale from query-parser opti…
Browse files Browse the repository at this point in the history
…ons. (#20208)

Lucene 6.2 introduces the new `Analyzer.normalize` API, which allows to apply
only character-level normalization such as lowercasing or accent folding, which
is exactly what is needed to process queries that operate on partial terms such
as `prefix`, `wildcard` or `fuzzy` queries. As a consequence, the
`lowercase_expanded_terms` option is not necessary anymore. Furthermore, the
`locale` option was only needed in order to know how to perform the lowercasing,
so this one can be removed as well.

Closes #9978
  • Loading branch information
jpountz committed Nov 2, 2016
1 parent 638353c commit 52de064
Show file tree
Hide file tree
Showing 29 changed files with 193 additions and 470 deletions.
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
Expand All @@ -34,6 +35,7 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.lucene.search.Queries;
Expand All @@ -42,6 +44,7 @@
import org.elasticsearch.index.mapper.LegacyDateFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;

Expand All @@ -63,7 +66,7 @@
* Also breaks fields with [type].[name] into a boolean query that must include the type
* as well as the query on the name.
*/
public class MapperQueryParser extends QueryParser {
public class MapperQueryParser extends AnalyzingQueryParser {

public static final Map<String, FieldQueryExtension> FIELD_QUERY_EXTENSIONS;

Expand Down Expand Up @@ -99,11 +102,10 @@ public void reset(QueryParserSettings settings) {
setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries());
setMaxDeterminizedStates(settings.maxDeterminizedStates());
setAllowLeadingWildcard(settings.allowLeadingWildcard());
setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
setLowercaseExpandedTerms(false);
setPhraseSlop(settings.phraseSlop());
setDefaultOperator(settings.defaultOperator());
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
setLocale(settings.locale());
setSplitOnWhitespace(settings.splitOnWhitespace());
}

Expand Down Expand Up @@ -330,21 +332,20 @@ private Query getRangeQuerySingle(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive, QueryShardContext context) {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (lowercaseExpandedTerms && currentFieldType.tokenized()) {
part1 = part1 == null ? null : part1.toLowerCase(locale);
part2 = part2 == null ? null : part2.toLowerCase(locale);
}

try {
BytesRef part1Binary = part1 == null ? null : getAnalyzer().normalize(field, part1);
BytesRef part2Binary = part2 == null ? null : getAnalyzer().normalize(field, part2);
Query rangeQuery;
if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) {
LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null, context);
rangeQuery = dateFieldType.rangeQuery(part1Binary, part2Binary,
startInclusive, endInclusive, settings.timeZone(), null, context);
} else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) {
DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null, context);
rangeQuery = dateFieldType.rangeQuery(part1Binary, part2Binary,
startInclusive, endInclusive, settings.timeZone(), null, context);
} else {
rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, context);
rangeQuery = currentFieldType.rangeQuery(part1Binary, part2Binary, startInclusive, endInclusive, context);
}
return rangeQuery;
} catch (RuntimeException e) {
Expand All @@ -358,9 +359,6 @@ private Query getRangeQuerySingle(String field, String part1, String part2,
}

protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -399,8 +397,9 @@ private Query getFuzzyQuerySingle(String field, String termStr, String minSimila
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
try {
return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity),
fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
BytesRef term = termStr == null ? null : getAnalyzer().normalize(field, termStr);
return currentFieldType.fuzzyQuery(term, Fuzziness.build(minSimilarity),
getFuzzyPrefixLength(), settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -423,9 +422,6 @@ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLeng

@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -471,8 +467,8 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.prefixQuery(termStr, multiTermRewriteMethod, context);
if (currentFieldType instanceof StringFieldType == false) {
query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context);
}
if (query == null) {
query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr);
Expand Down Expand Up @@ -590,9 +586,6 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep
return FIELD_QUERY_EXTENSIONS.get(ExistsFieldQueryExtension.NAME).query(context, actualField);
}
}
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -639,9 +632,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
indexedNameField = currentFieldType.name();
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
}
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
return super.getWildcardQuery(indexedNameField, termStr);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -652,75 +644,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
}
}

private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
if (!settings.analyzeWildcard()) {
return super.getWildcardQuery(field, termStr);
}
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
StringBuilder aggStr = new StringBuilder();
StringBuilder tmp = new StringBuilder();
for (int i = 0; i < termStr.length(); i++) {
char c = termStr.charAt(i);
if (c == '?' || c == '*') {
if (isWithinToken) {
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
} catch (IOException e) {
aggStr.append(tmp);
}
tmp.setLength(0);
}
isWithinToken = false;
aggStr.append(c);
} else {
tmp.append(c);
isWithinToken = true;
}
}
if (isWithinToken) {
try {
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
}
} catch (IOException e) {
aggStr.append(tmp);
}
}

return super.getWildcardQuery(field, aggStr.toString());
}

@Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -768,7 +693,7 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.regexpQuery(termStr, RegExp.ALL,
maxDeterminizedStates, multiTermRewriteMethod, context);
getMaxDeterminizedStates(), getMultiTermRewriteMethod(), context);
}
if (query == null) {
query = super.getRegexpQuery(field, termStr);
Expand Down
Expand Up @@ -24,7 +24,6 @@
import org.elasticsearch.common.unit.Fuzziness;
import org.joda.time.DateTimeZone;

import java.util.Locale;
import java.util.Map;

/**
Expand Down Expand Up @@ -53,12 +52,8 @@ public class QueryParserSettings {

private boolean analyzeWildcard;

private boolean lowercaseExpandedTerms;

private boolean enablePositionIncrements;

private Locale locale;

private Fuzziness fuzziness;
private int fuzzyPrefixLength;
private int fuzzyMaxExpansions;
Expand Down Expand Up @@ -137,14 +132,6 @@ public void allowLeadingWildcard(boolean allowLeadingWildcard) {
this.allowLeadingWildcard = allowLeadingWildcard;
}

public boolean lowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}

public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}

public boolean enablePositionIncrements() {
return enablePositionIncrements;
}
Expand Down Expand Up @@ -269,14 +256,6 @@ public void useDisMax(boolean useDisMax) {
this.useDisMax = useDisMax;
}

public void locale(Locale locale) {
this.locale = locale;
}

public Locale locale() {
return this.locale;
}

public void timeZone(DateTimeZone timeZone) {
this.timeZone = timeZone;
}
Expand Down
Expand Up @@ -54,7 +54,6 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import java.util.stream.Collectors;

Expand Down

0 comments on commit 52de064

Please sign in to comment.