Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose Lucene's new TopTermsBlendedFreqScoringRewrite. #12129

Merged
merged 1 commit into from
Jul 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("transpositions".equals(currentFieldName)) {
transpositions = parser.booleanValue();
} else if ("rewrite".equals(currentFieldName)) {
rewriteMethod = QueryParsers.parseRewriteMethod(parser.textOrNull(), null);
rewriteMethod = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null);
} else if ("_name".equals(currentFieldName)) {
queryName = parser.text();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
minimumShouldMatch = parser.textOrNull();
} else if ("rewrite".equals(currentFieldName)) {
matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
} else if ("fuzzy_transpositions".equals(currentFieldName)) {
matchQuery.setTranspositions(parser.booleanValue());
} else if ("lenient".equals(currentFieldName)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
minimumShouldMatch = parser.textOrNull();
} else if ("rewrite".equals(currentFieldName)) {
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull(), null));
} else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) {
useDisMax = parser.booleanValue();
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
throw new QueryParsingException(parseContext, "No value specified for prefix query");
}

MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);

Query query = null;
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("fuzzy_max_expansions".equals(currentFieldName) || "fuzzyMaxExpansions".equals(currentFieldName)) {
qpSettings.fuzzyMaxExpansions(parser.intValue());
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
} else if ("phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
qpSettings.phraseSlop(parser.intValue());
} else if (parseContext.parseFieldMatcher().match(currentFieldName, FUZZINESS)) {
Expand All @@ -187,7 +187,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
qpSettings.analyzeWildcard(parser.booleanValue());
} else if ("rewrite".equals(currentFieldName)) {
qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull()));
qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), parser.textOrNull()));
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
qpSettings.minimumShouldMatch(parser.textOrNull());
} else if ("quote_field_suffix".equals(currentFieldName) || "quoteFieldSuffix".equals(currentFieldName)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
throw new QueryParsingException(parseContext, "No value specified for regexp query");
}

MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null);
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null);

Query query = null;
MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.support.QueryParsers;

Expand Down Expand Up @@ -103,8 +102,8 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
}

WildcardQuery wildcardQuery = new WildcardQuery(new Term(fieldName, valueBytes));
QueryParsers.setRewriteMethod(wildcardQuery, rewriteMethod);
wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(rewriteMethod));
QueryParsers.setRewriteMethod(wildcardQuery, parseContext.parseFieldMatcher(), rewriteMethod);
wildcardQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod));
wildcardQuery.setBoost(boost);
if (queryName != null) {
parseContext.addNamedQuery(queryName, wildcardQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,22 @@
package org.elasticsearch.index.query.support;

import org.apache.lucene.search.MultiTermQuery;

import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;

/**
*
*/
public final class QueryParsers {

private static final ParseField CONSTANT_SCORE = new ParseField("constant_score", "constant_score_auto", "constant_score_filter");
private static final ParseField SCORING_BOOLEAN = new ParseField("scoring_boolean");
private static final ParseField CONSTANT_SCORE_BOOLEAN = new ParseField("constant_score_boolean");
private static final ParseField TOP_TERMS = new ParseField("top_terms_");
private static final ParseField TOP_TERMS_BOOST = new ParseField("top_terms_boost_");
private static final ParseField TOP_TERMS_BLENDED_FREQS = new ParseField("top_terms_blended_freqs_");

private QueryParsers() {

}
Expand All @@ -39,50 +47,55 @@ public static void setRewriteMethod(MultiTermQuery query, @Nullable MultiTermQue
query.setRewriteMethod(rewriteMethod);
}

public static void setRewriteMethod(MultiTermQuery query, @Nullable String rewriteMethod) {
public static void setRewriteMethod(MultiTermQuery query, ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
if (rewriteMethod == null) {
return;
}
query.setRewriteMethod(parseRewriteMethod(rewriteMethod));
query.setRewriteMethod(parseRewriteMethod(matcher, rewriteMethod));
}

public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod) {
return parseRewriteMethod(rewriteMethod, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod) {
return parseRewriteMethod(matcher, rewriteMethod, MultiTermQuery.CONSTANT_SCORE_REWRITE);
}

public static MultiTermQuery.RewriteMethod parseRewriteMethod(@Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
public static MultiTermQuery.RewriteMethod parseRewriteMethod(ParseFieldMatcher matcher, @Nullable String rewriteMethod, @Nullable MultiTermQuery.RewriteMethod defaultRewriteMethod) {
if (rewriteMethod == null) {
return defaultRewriteMethod;
}
if ("constant_score_auto".equals(rewriteMethod) || "constant_score_auto".equals(rewriteMethod)) {
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
}
if ("scoring_boolean".equals(rewriteMethod) || "scoringBoolean".equals(rewriteMethod)) {
return MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
}
if ("constant_score_boolean".equals(rewriteMethod) || "constantScoreBoolean".equals(rewriteMethod)) {
return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
}
if ("constant_score_filter".equals(rewriteMethod) || "constantScoreFilter".equals(rewriteMethod)) {
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
if (matcher.match(rewriteMethod, CONSTANT_SCORE)) {
return MultiTermQuery.CONSTANT_SCORE_REWRITE;
}
if (rewriteMethod.startsWith("top_terms_boost_")) {
int size = Integer.parseInt(rewriteMethod.substring("top_terms_boost_".length()));
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
if (matcher.match(rewriteMethod, SCORING_BOOLEAN)) {
return MultiTermQuery.SCORING_BOOLEAN_REWRITE;
}
if (rewriteMethod.startsWith("topTermsBoost")) {
int size = Integer.parseInt(rewriteMethod.substring("topTermsBoost".length()));
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
if (matcher.match(rewriteMethod, CONSTANT_SCORE_BOOLEAN)) {
return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE;
}
if (rewriteMethod.startsWith("top_terms_")) {
int size = Integer.parseInt(rewriteMethod.substring("top_terms_".length()));
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);

int firstDigit = -1;
for (int i = 0; i < rewriteMethod.length(); ++i) {
if (Character.isDigit(rewriteMethod.charAt(i))) {
firstDigit = i;
break;
}
}
if (rewriteMethod.startsWith("topTerms")) {
int size = Integer.parseInt(rewriteMethod.substring("topTerms".length()));
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);

if (firstDigit >= 0) {
final int size = Integer.parseInt(rewriteMethod.substring(firstDigit));
String rewriteMethodName = rewriteMethod.substring(0, firstDigit);

if (matcher.match(rewriteMethodName, TOP_TERMS)) {
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(size);
}
if (matcher.match(rewriteMethodName, TOP_TERMS_BOOST)) {
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(size);
}
if (matcher.match(rewriteMethodName, TOP_TERMS_BLENDED_FREQS)) {
return new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(size);
}
}

throw new IllegalArgumentException("Failed to parse rewrite_method [" + rewriteMethod + "]");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.*;
import org.apache.lucene.index.memory.MemoryIndex;
Expand All @@ -29,6 +30,7 @@
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
Expand Down Expand Up @@ -68,6 +70,7 @@
import org.junit.Test;

import java.io.IOException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;

Expand Down Expand Up @@ -428,6 +431,7 @@ public void testFuzzyQuery() throws IOException {
assertThat(parsedQuery, instanceOf(FuzzyQuery.class));
FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery;
assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh")));
assertThat(fuzzyQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
}

@Test
Expand Down Expand Up @@ -2423,4 +2427,16 @@ public void testTermsQueryFilter() throws Exception {
q = csq.getQuery();
assertThat(q, instanceOf(TermsQuery.class));
}

@Test
public void testBlendedRewriteMethod() throws IOException {
IndexQueryParserService queryParser = queryParser();
for (String rewrite : Arrays.asList("top_terms_blended_freqs_10", "topTermsBlendedFreqs10")) {
Query parsedQuery = queryParser.parse(prefixQuery("field", "val").rewrite(rewrite)).query();
assertThat(parsedQuery, instanceOf(PrefixQuery.class));
PrefixQuery prefixQuery = (PrefixQuery) parsedQuery;
assertThat(prefixQuery.getPrefix(), equalTo(new Term("field", "val")));
assertThat(prefixQuery.getRewriteMethod(), instanceOf(MultiTermQuery.TopTermsBlendedFreqScoringRewrite.class));
}
}
}
19 changes: 11 additions & 8 deletions docs/reference/query-dsl/multi-term-rewrite.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ also happens on the
All of those queries allow to control how they will get rewritten using
the `rewrite` parameter:

* When not set, or set to `constant_score_auto`, defaults to
automatically choosing either `constant_score_boolean` or
`constant_score_filter` based on query characteristics.
* `constant_score` (default): A rewrite method that performs like
`constant_score_boolean` when there are few matching terms and otherwise
visits all matching terms in sequence and marks documents for that term.
Matching documents are assigned a constant score equal to the query's
boost.
* `scoring_boolean`: A rewrite method that first translates each term
into a should clause in a boolean query, and keeps the scores as
computed by the query. Note that typically such scores are meaningless
Expand All @@ -25,10 +27,6 @@ are not computed. Instead, each matching document receives a constant
score equal to the query's boost. This rewrite method will hit too many
clauses failure if it exceeds the boolean query limit (defaults to
`1024`).
* `constant_score_filter`: A rewrite method that first creates a private
Filter by visiting each term in sequence and marking all docs for that
term. Matching documents are assigned a constant score equal to the
query's boost.
* `top_terms_N`: A rewrite method that first translates each term into
should clause in boolean query, and keeps the scores as computed by the
query. This rewrite method only uses the top scoring terms so it will
Expand All @@ -39,4 +37,9 @@ into should clause in boolean query, but the scores are only computed as
the boost. This rewrite method only uses the top scoring terms so it
will not overflow the boolean max clause count. The `N` controls the
size of the top scoring terms to use.

* `top_terms_blended_freqs_N`: A rewrite method that first translates each
term into should clause in boolean query, but all term queries compute scores
as if they had the same frequency. In practice the frequency which is used
is the maximum frequency of all matching terms. This rewrite method only uses
the top scoring terms so it will not overflow boolean max clause count. The
`N` controls the size of the top scoring terms to use.