Skip to content

Commit

Permalink
Add match query support for stacked tokens
Browse files Browse the repository at this point in the history
SynonymFilters produces token streams with stacked tokens such that
conjunction queries need to be parsed in a special way such that the
stacked tokens are added as an innner disjuncition.

Closes #3881
  • Loading branch information
s1monw committed Oct 14, 2013
1 parent f4233ac commit dcef69b
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 3 deletions.
23 changes: 23 additions & 0 deletions src/main/java/org/elasticsearch/index/search/MatchQuery.java
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
Expand Down Expand Up @@ -241,6 +242,28 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
q.add(new Term(field, termToByteRef(termAtt)));
}
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
} if (severalTokensAtSamePosition && occur == Occur.MUST) {
BooleanQuery q = new BooleanQuery(positionCount == 1);
Query currentQuery = null;
for (int i = 0; i < numTokens; i++) {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
if (!(currentQuery instanceof BooleanQuery)) {
Query t = currentQuery;
currentQuery = new BooleanQuery(true);
((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
}
((BooleanQuery)currentQuery).add(newTermQuery(mapper, new Term(field, termToByteRef(termAtt))), BooleanClause.Occur.SHOULD);
} else {
if (currentQuery != null) {
q.add(currentQuery, occur);
}
currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt)));
}
}
q.add(currentQuery, occur);
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
} else {
BooleanQuery q = new BooleanQuery(positionCount == 1);
for (int i = 0; i < numTokens; i++) {
Expand Down
Expand Up @@ -40,9 +40,7 @@
import static org.elasticsearch.index.query.FilterBuilders.*;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.*;

/**
*
Expand Down
146 changes: 146 additions & 0 deletions src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java
Expand Up @@ -21,11 +21,13 @@

import org.apache.lucene.util.English;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.CommonTermsQueryBuilder.Operator;
Expand All @@ -44,6 +46,9 @@
import java.util.Random;
import java.util.concurrent.ExecutionException;

import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.*;
import static org.elasticsearch.index.query.QueryBuilders.*;
Expand Down Expand Up @@ -1618,4 +1623,145 @@ public void testMultiFieldQueryString() {
assertHitCount(client().prepareSearch("test").setQuery(queryString("/value[01]/").field("field1").field("field2")).get(), 1);
assertHitCount(client().prepareSearch("test").setQuery(queryString("field\\*:/value[01]/")).get(), 1);
}

// see #3881 - for extensive description of the issue
@Test
public void testMatchQueryWithSynonyms() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.analysis.analyzer.index.type", "custom")
.put("index.analysis.analyzer.index.tokenizer", "standard")
.put("index.analysis.analyzer.index.filter", "lowercase")
.put("index.analysis.analyzer.search.type", "custom")
.put("index.analysis.analyzer.search.tokenizer", "standard")

.putArray("index.analysis.analyzer.search.filter", "lowercase", "synonym")
.put("index.analysis.filter.synonym.type", "synonym")
.putArray("index.analysis.filter.synonym.synonyms", "fast, quick"));

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("test")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("index_analyzer", "index")
.field("search_analyzer", "search")
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("test", mapping));
ensureGreen();
client().prepareIndex("test", "test", "1").setSource(jsonBuilder().startObject()
.field("text", "quick brown fox")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
SearchResponse searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "quick").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "quick brown").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "fast").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);

client().prepareIndex("test", "test", "2").setSource(jsonBuilder().startObject()
.field("text", "fast brown fox")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "quick").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2);
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "quick brown").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2);
}

@Test
public void testMatchQueryWithStackedStems() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.analysis.analyzer.index.type", "custom")
.put("index.analysis.analyzer.index.tokenizer", "standard")
.put("index.analysis.analyzer.index.filter", "lowercase")
.put("index.analysis.analyzer.search.type", "custom")
.put("index.analysis.analyzer.search.tokenizer", "standard")
.putArray("index.analysis.analyzer.search.filter", "lowercase", "keyword_repeat", "porterStem", "unique_stem")
.put("index.analysis.filter.unique_stem.type", "unique")
.put("index.analysis.filter.unique_stem.only_on_same_position", true));

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("test")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("index_analyzer", "index")
.field("search_analyzer", "search")
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("test", mapping));
ensureGreen();
client().prepareIndex("test", "test", "1").setSource(jsonBuilder().startObject()
.field("text", "the fox runs across the street")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
SearchResponse searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "fox runs").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);

client().prepareIndex("test", "test", "2").setSource(jsonBuilder().startObject()
.field("text", "run fox run")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "fox runs").operator(MatchQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2);
}

@Test
public void testQueryStringWithSynonyms() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.analysis.analyzer.index.type", "custom")
.put("index.analysis.analyzer.index.tokenizer", "standard")
.put("index.analysis.analyzer.index.filter", "lowercase")
.put("index.analysis.analyzer.search.type", "custom")
.put("index.analysis.analyzer.search.tokenizer", "standard")

.putArray("index.analysis.analyzer.search.filter", "lowercase", "synonym")
.put("index.analysis.filter.synonym.type", "synonym")
.putArray("index.analysis.filter.synonym.synonyms", "fast, quick"));

XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("test")
.startObject("properties")
.startObject("text")
.field("type", "string")
.field("index_analyzer", "index")
.field("search_analyzer", "search")
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("test", mapping));
ensureGreen();
client().prepareIndex("test", "test", "1").setSource(jsonBuilder().startObject()
.field("text", "quick brown fox")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
SearchResponse searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryString("quick").defaultField("text").defaultOperator(QueryStringQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryString("quick brown").defaultField("text").defaultOperator(QueryStringQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch().setQuery(QueryBuilders.queryString("fast").defaultField("text").defaultOperator(QueryStringQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 1);

client().prepareIndex("test", "test", "2").setSource(jsonBuilder().startObject()
.field("text", "fast brown fox")
.endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryString("quick").defaultField("text").defaultOperator(QueryStringQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2);
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryString("quick brown").defaultField("text").defaultOperator(QueryStringQueryBuilder.Operator.AND)).get();
assertHitCount(searchResponse, 2);
}
}

0 comments on commit dcef69b

Please sign in to comment.