Skip to content

Commit

Permalink
Remove the lowercase_expanded_terms and locale options from `(sim…
Browse files Browse the repository at this point in the history
…ple_)query_string`.

This pull request uses the `MultiTermAwareComponent` interface in order to
figure out how to deal with queries that match partial strings. This provides a
better out-of-the-box experience and allows to remove the
`lowercase_expanded_terms` and `locale` (which was only used for lowercasing)
options.

Things are expected to work well for custom analyzers. However, built-in
analyzers make it challenging to know which components should be kept for
multi-term analysis. The way it is implemented today is thet there is a default
implementation that returns a lowercasing analyzer, which should be fine for
most language analyzers for european languages. I did not want to go crazy
with configuring the correct multi-term analyzer for those until we have a way
to test that we are sync'ed with what happens in Lucene like we do for testing
which factories need to implement `MultiTermAwareComponent`.

In the future we could consider removing `analyze_wildcards` as well, but the
query parser currently has the ability to tokenize it and generate a term query
for the n-1 first tokens and a wildcard query on the last token. I suspect some
users are relying on this behaviour so I think this should be explored in a
separate change.

Closes elastic#9978
  • Loading branch information
jpountz committed Jun 24, 2016
1 parent 7ba5bce commit 8c95540
Show file tree
Hide file tree
Showing 107 changed files with 1,134 additions and 684 deletions.
Expand Up @@ -93,18 +93,41 @@ public void reset(QueryParserSettings settings) {
} else {
this.field = null;
}
setAnalyzer(settings.analyzer());
setMultiTermRewriteMethod(settings.rewriteMethod());
if (settings.analyzer() != null) {
setAnalyzer(settings.analyzer());
} else {
setAnalyzer(context.getMapperService().searchAnalyzer());
}
if (settings.rewriteMethod() != null) {
setMultiTermRewriteMethod(settings.rewriteMethod());
}
setEnablePositionIncrements(settings.enablePositionIncrements());
setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries());
setMaxDeterminizedStates(settings.maxDeterminizedStates());
setAllowLeadingWildcard(settings.allowLeadingWildcard());
setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
setLowercaseExpandedTerms(false); // no need for it, we use the mappings to figure it out
setPhraseSlop(settings.phraseSlop());
setDefaultOperator(settings.defaultOperator());
setFuzzyMinSim(settings.fuzziness().asFloat());
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
setLocale(settings.locale());
}

private void setMultiTermAnalyzer() {
if (settings.multiTermAnalyzer() != null) {
setAnalyzer(settings.multiTermAnalyzer());
} else {
setAnalyzer(context.getMapperService().searchMultiTermAnalyzer());
}
}

private void setQuoteAnalyzer() {
if (settings.quoteAnalyzer() != null) {
setAnalyzer(settings.quoteAnalyzer());
} else if (settings.analyzer() != null) {
setAnalyzer(settings.analyzer());
} else {
setAnalyzer(context.getMapperService().searchQuoteAnalyzer());
}
}

/**
Expand Down Expand Up @@ -197,31 +220,21 @@ private Query getFieldQuerySingle(String field, String queryText, boolean quoted
currentFieldType = null;
Analyzer oldAnalyzer = getAnalyzer();
try {
if (quoted) {
setAnalyzer(settings.quoteAnalyzer());
if (settings.quoteFieldSuffix() != null) {
currentFieldType = context.fieldMapper(field + settings.quoteFieldSuffix());
}
if (quoted && settings.quoteFieldSuffix() != null) {
currentFieldType = context.fieldMapper(field + settings.quoteFieldSuffix());
}
if (currentFieldType == null) {
currentFieldType = context.fieldMapper(field);
}
if (currentFieldType != null) {
if (quoted) {
if (!settings.forceQuoteAnalyzer()) {
setAnalyzer(context.getSearchQuoteAnalyzer(currentFieldType));
}
} else {
if (!settings.forceAnalyzer()) {
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
setQuoteAnalyzer();
}
if (currentFieldType != null) {
Query query = null;
if (currentFieldType.tokenized() == false) {
// this might be a structured field like a numeric
try {
query = currentFieldType.termQuery(queryText, context);
return currentFieldType.termQuery(queryText, context);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -230,10 +243,6 @@ private Query getFieldQuerySingle(String field, String queryText, boolean quoted
}
}
}
if (query == null) {
query = super.getFieldQuery(currentFieldType.name(), queryText, quoted);
}
return query;
}
}
return super.getFieldQuery(field, queryText, quoted);
Expand Down Expand Up @@ -328,39 +337,61 @@ protected Query getRangeQuery(String field, String part1, String part2,

private Query getRangeQuerySingle(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive) {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (lowercaseExpandedTerms && currentFieldType.tokenized()) {
part1 = part1 == null ? null : part1.toLowerCase(locale);
part2 = part2 == null ? null : part2.toLowerCase(locale);
}
Analyzer oldAnalyzer = getAnalyzer();
try {
setMultiTermAnalyzer();
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
try {
if (currentFieldType.tokenized()) {
part1 = part1 == null ? null : analyzeSingleToken(field, part1);
part2 = part2 == null ? null : analyzeSingleToken(field, part2);
}

try {
Query rangeQuery;
if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) {
LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null);
} else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) {
DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null);
} else {
rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive);
}
return rangeQuery;
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Query rangeQuery;
if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) {
LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null);
} else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) {
DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null);
} else {
rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive);
}
return rangeQuery;
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
}
throw e;
}
throw e;
}
return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
} finally {
setAnalyzer(oldAnalyzer);
}
return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
}

protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
private String analyzeSingleToken(String field, String value) {
try (TokenStream tk = getAnalyzer().tokenStream(field, value)) {
CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
tk.reset();
final String term;
if (tk.incrementToken()) {
term = termAtt.toString();
} else {
throw new IllegalStateException("Expected 1 token but got 0");
}
if (tk.incrementToken()) {
throw new IllegalStateException("Expected 1 token but got 2 or more");
}
return term;
} catch (IOException e) {
throw new IllegalStateException("Cannot happen", e);
}
}

protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -396,19 +427,26 @@ protected Query getFuzzyQuery(String field, String termStr, String minSimilarity
}

private Query getFuzzyQuerySingle(String field, String termStr, String minSimilarity) throws ParseException {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
try {
return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity),
fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Analyzer oldAnalyzer = getAnalyzer();
try {
setMultiTermAnalyzer();
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
try {
termStr = termStr == null ? null : analyzeSingleToken(field, termStr);
return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity),
fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
}
throw e;
}
throw e;
}
return super.getFuzzyQuery(field, termStr, Float.parseFloat(minSimilarity));
} finally {
setAnalyzer(oldAnalyzer);
}
return super.getFuzzyQuery(field, termStr, Float.parseFloat(minSimilarity));
}

@Override
Expand All @@ -423,9 +461,6 @@ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLeng

@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -467,9 +502,6 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
try {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (!settings.forceAnalyzer()) {
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.prefixQuery(termStr, multiTermRewriteMethod, context);
Expand All @@ -491,8 +523,8 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
}

private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
if (!settings.analyzeWildcard()) {
return super.getPrefixQuery(field, termStr);
if (settings.analyzeWildcard() == false) {
setMultiTermAnalyzer();
}
List<List<String> > tlist;
// get Analyzer from superclass and tokenize the term
Expand Down Expand Up @@ -590,9 +622,6 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep
return FIELD_QUERY_EXTENSIONS.get(ExistsFieldQueryExtension.NAME).query(context, actualField);
}
}
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -629,19 +658,10 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep
}

private Query getWildcardQuerySingle(String field, String termStr) throws ParseException {
String indexedNameField = field;
currentFieldType = null;
Analyzer oldAnalyzer = getAnalyzer();
try {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (!settings.forceAnalyzer()) {
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
indexedNameField = currentFieldType.name();
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
}
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
return getPossiblyAnalyzedWildcardQuery(field, termStr);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -653,8 +673,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
}

private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
if (!settings.analyzeWildcard()) {
return super.getWildcardQuery(field, termStr);
if (settings.analyzeWildcard() == false) {
setMultiTermAnalyzer();
}
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
StringBuilder aggStr = new StringBuilder();
Expand Down Expand Up @@ -718,9 +738,6 @@ private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) thr

@Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -760,11 +777,9 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
currentFieldType = null;
Analyzer oldAnalyzer = getAnalyzer();
try {
setMultiTermAnalyzer();
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (!settings.forceAnalyzer()) {
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.regexpQuery(termStr, RegExp.ALL,
Expand Down

0 comments on commit 8c95540

Please sign in to comment.