Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Match query with operator and, cutoff_frequency and stacked tokens #6573

Merged
merged 1 commit into from Jun 25, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 0 additions & 7 deletions docs/reference/query-dsl/queries/match-query.asciidoc
Expand Up @@ -98,13 +98,6 @@ The `cutoff_frequency` can either be relative to the number of documents
in the index if in the range `[0..1)` or absolute if greater or equal to
`1.0`.

Note: If the `cutoff_frequency` is used and the operator is `and`
_stacked tokens_ (tokens that are on the same position like `synonym` filter emits)
are not handled gracefully as they are in a pure `and` query. For instance the query
`fast fox` is analyzed into 3 terms `[fast, quick, fox]` where `quick` is a synonym
for `fast` on the same token positions the query might require `fast` and `quick` to
match if the operator is `and`.

Here is an example showing a query composed of stopwords exclusivly:

[source,js]
Expand Down
Expand Up @@ -279,7 +279,7 @@ public Query createPhrasePrefixQuery(String field, String queryText, int phraseS
}

public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, FieldMapper<?> mapper) {
Query booleanQuery = createBooleanQuery(field, queryText, Occur.SHOULD);
Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur);
if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) booleanQuery;
ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled(), mapper);
Expand Down
102 changes: 100 additions & 2 deletions src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java
Expand Up @@ -321,7 +321,7 @@ public void testCommonTermsQuery() throws Exception {

searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the quick brown").cutoffFrequency(3).analyzer("stop")).get();
assertHitCount(searchResponse, 3l);
// standard drops "the" since its a stopword
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));
Expand All @@ -340,7 +340,7 @@ public void testCommonTermsQuery() throws Exception {

searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the quick brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND).analyzer("stop")).get();
assertHitCount(searchResponse, 3l);
// standard drops "the" since its a stopword
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));
Expand All @@ -353,6 +353,104 @@ public void testCommonTermsQuery() throws Exception {
assertThirdHit(searchResponse, hasId("2"));
}

@Test
public void testCommonTermsQueryStackedTokens() throws Exception {
assertAcked(prepareCreate("test")
.setSettings(settingsBuilder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS,1)
.put("index.analysis.filter.syns.type","synonym")
.putArray("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","whitespace")
.put("index.analysis.analyzer.syns.filter","syns")
)
.addMapping("type1", "field1", "type=string,analyzer=syns", "field2", "type=string,analyzer=syns"));
ensureGreen();

indexRandom(true, client().prepareIndex("test", "type1", "3").setSource("field1", "quick lazy huge brown pidgin", "field2", "the quick lazy huge brown fox jumps over the tree"),
client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox"),
client().prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree") );

SearchResponse searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.OR)).get();
assertHitCount(searchResponse, 3l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));

searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.AND)).get();
assertThat(searchResponse.getHits().totalHits(), equalTo(2l));
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));

// Default
searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast brown").cutoffFrequency(3)).get();
assertHitCount(searchResponse, 3l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));


searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast huge fox").lowFreqMinimumShouldMatch("3")).get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("5")).get();
assertHitCount(searchResponse, 2l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("6")).get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery("{ \"common\" : { \"field1\" : { \"query\" : \"the fast lazy fox brown\", \"cutoff_frequency\" : 1, \"minimum_should_match\" : { \"high_freq\" : 6 } } } }").get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("2"));

// Default
searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the fast lazy fox brown").cutoffFrequency(1)).get();
assertHitCount(searchResponse, 1l);
assertFirstHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery(commonTerms("field1", "the quick brown").cutoffFrequency(3).analyzer("stop")).get();
assertHitCount(searchResponse, 3l);
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));

// try the same with match query
searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.OR)).get();
assertHitCount(searchResponse, 3l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));

searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND).analyzer("stop")).get();
assertHitCount(searchResponse, 3l);
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));

searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).minimumShouldMatch("3")).get();
assertHitCount(searchResponse, 2l);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));

// try the same with multi match query
searchResponse = client().prepareSearch().setQuery(multiMatchQuery("the fast brown", "field1", "field2").cutoffFrequency(3).operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 3l);
assertFirstHit(searchResponse, hasId("3")); // better score due to different query stats
assertSecondHit(searchResponse, hasId("1"));
assertThirdHit(searchResponse, hasId("2"));
}

@Test
public void testOmitTermFreqsAndPositions() throws Exception {
Version version = Version.CURRENT;
Expand Down