Skip to content

Commit

Permalink
Search: Optimize (perf) execution of global facets, closes elastic#889.
Browse files Browse the repository at this point in the history
  • Loading branch information
kimchy committed Apr 28, 2011
1 parent 0ab8d1f commit 2c0bb91
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.benchmark.search.facet;

import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.util.concurrent.jsr166y.ThreadLocalRandom;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.node.Node;
import org.elasticsearch.search.facet.FacetBuilders;

import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.cluster.metadata.IndexMetaData.*;
import static org.elasticsearch.common.settings.ImmutableSettings.*;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.elasticsearch.node.NodeBuilder.*;

public class QueryFilterFacetSearchBenchmark {

static long COUNT = SizeValue.parseSizeValue("1m").singles();
static int BATCH = 100;
static int QUERY_COUNT = 200;
static int NUMBER_OF_TERMS = 200;

static Client client;

public static void main(String[] args) throws Exception {
Settings settings = settingsBuilder()
.put("index.refresh_interval", "-1")
.put("gateway.type", "local")
.put(SETTING_NUMBER_OF_SHARDS, 2)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();

Node node1 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node1")).node();
Node node2 = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node2")).node();

Node clientNode = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();

client = clientNode.client();

long[] lValues = new long[NUMBER_OF_TERMS];
for (int i = 0; i < NUMBER_OF_TERMS; i++) {
lValues[i] = ThreadLocalRandom.current().nextLong();
}

Thread.sleep(10000);
try {
client.admin().indices().create(createIndexRequest("test")).actionGet();

StopWatch stopWatch = new StopWatch().start();

System.out.println("--> Indexing [" + COUNT + "] ...");
long ITERS = COUNT / BATCH;
long i = 1;
int counter = 0;
for (; i <= ITERS; i++) {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH; j++) {
counter++;

XContentBuilder builder = jsonBuilder().startObject();
builder.field("id", Integer.toString(counter));
builder.field("l_value", lValues[counter % lValues.length]);

builder.endObject();

request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter))
.source(builder));
}
BulkResponse response = request.execute().actionGet();
if (response.hasFailures()) {
System.err.println("--> failures...");
}
if (((i * BATCH) % 10000) == 0) {
System.out.println("--> Indexed " + (i * BATCH) + " took " + stopWatch.stop().lastTaskTime());
stopWatch.start();
}
}
System.out.println("--> Indexing took " + stopWatch.totalTime() + ", TPS " + (((double) (COUNT)) / stopWatch.totalTime().secondsFrac()));
} catch (Exception e) {
System.out.println("--> Index already exists, ignoring indexing phase, waiting for green");
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
if (clusterHealthResponse.timedOut()) {
System.err.println("--> Timed out waiting for cluster health");
}
}
client.admin().indices().prepareRefresh().execute().actionGet();
COUNT = client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count();
System.out.println("--> Number of docs in index: " + COUNT);


long totalQueryTime = 0;

totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Simple Query on first l_value " + (totalQueryTime / QUERY_COUNT) + "ms");

totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0])))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Query facet first l_value " + (totalQueryTime / QUERY_COUNT) + "ms");

totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(termQuery("l_value", lValues[0]))
.addFacet(FacetBuilders.queryFacet("query").query(termQuery("l_value", lValues[0])).global(true))
.execute().actionGet();
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Query facet first l_value (global) " + (totalQueryTime / QUERY_COUNT) + "ms");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ public AbstractFacetCollector(String facetName) {
this.facetName = facetName;
}

public Filter getFilter() {
return this.filter;
}

@Override public void setFilter(Filter filter) {
if (this.filter == null) {
this.filter = filter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

package org.elasticsearch.search.facet;

import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.*;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.NoopCollector;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchPhase;
Expand Down Expand Up @@ -67,22 +67,51 @@ public class FacetPhase implements SearchPhase {
return;
}

// run global facets ...
if (context.searcher().hasCollectors(ContextIndexSearcher.Scopes.GLOBAL)) {
Query query = Queries.MATCH_ALL_QUERY;
if (context.types().length > 0) {
query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types())));
}
// optimize global facet execution, based on filters (don't iterate over all docs), and check
// if we have special facets that can be optimized for all execution, do it
List<Collector> collectors = context.searcher().removeCollectors(ContextIndexSearcher.Scopes.GLOBAL);

context.searcher().processingScope(ContextIndexSearcher.Scopes.GLOBAL);
try {
context.searcher().search(query, NoopCollector.NOOP_COLLECTOR);
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
} finally {
context.searcher().processedScope();
if (collectors != null && !collectors.isEmpty()) {
Map<Filter, List<Collector>> filtersByCollector = Maps.newHashMap();
for (Collector collector : collectors) {
if (collector instanceof OptimizeGlobalFacetCollector) {
try {
((OptimizeGlobalFacetCollector) collector).optimizedGlobalExecution(context);
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
}
} else {
Filter filter = Queries.MATCH_ALL_FILTER;
if (collector instanceof AbstractFacetCollector) {
AbstractFacetCollector facetCollector = (AbstractFacetCollector) collector;
if (facetCollector.getFilter() != null) {
filter = facetCollector.getFilter();
}
}
List<Collector> list = filtersByCollector.get(filter);
if (list == null) {
list = ImmutableList.of(collector);
filtersByCollector.put(filter, list);
} else {
list.add(collector);
}
}
}
// now, go and execute the filters->collector ones
for (Map.Entry<Filter, List<Collector>> entry : filtersByCollector.entrySet()) {
Filter filter = entry.getKey();
Query query = new DeletionAwareConstantScoreQuery(filter);
if (context.types().length > 0) {
query = new FilteredQuery(query, context.filterCache().cache(context.mapperService().typesFilter(context.types())));
}
try {
context.searcher().search(query, MultiCollector.wrap(entry.getValue().toArray(new Collector[entry.getValue().size()])));
} catch (IOException e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global facets", e);
}
}
}

SearchContextFacets contextFacets = context.facets();

List<Facet> facets = Lists.newArrayListWithCapacity(2);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.facet;

import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;

public interface OptimizeGlobalFacetCollector {

void optimizedGlobalExecution(SearchContext searchContext) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,21 @@
package org.elasticsearch.search.facet.filter;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.*;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.DocSets;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;

/**
* @author kimchy (shay.banon)
*/
public class FilterFacetCollector extends AbstractFacetCollector {
public class FilterFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector {

private final Filter filter;

Expand All @@ -45,6 +47,19 @@ public FilterFacetCollector(String facetName, Filter filter, FilterCache filterC
this.filter = filter;
}

@Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException {
Query query = new DeletionAwareConstantScoreQuery(filter);
if (super.filter != null) {
query = new FilteredQuery(query, super.filter);
}
if (searchContext.types().length > 0) {
query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types())));
}
TotalHitCountCollector collector = new TotalHitCountCollector();
searchContext.searcher().search(query, collector);
count = collector.getTotalHits();
}

@Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException {
docSet = DocSets.convert(reader, filter.getDocIdSet(reader));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,17 @@
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.search.facet.AbstractFacetCollector;
import org.elasticsearch.search.facet.Facet;
import org.elasticsearch.search.facet.OptimizeGlobalFacetCollector;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;

/**
* @author kimchy (shay.banon)
*/
public class QueryFacetCollector extends AbstractFacetCollector {
public class QueryFacetCollector extends AbstractFacetCollector implements OptimizeGlobalFacetCollector {

private final Query query;

private final Filter filter;

Expand All @@ -43,6 +47,7 @@ public class QueryFacetCollector extends AbstractFacetCollector {

public QueryFacetCollector(String facetName, Query query, FilterCache filterCache) {
super(facetName);
this.query = query;
Filter possibleFilter = extractFilterIfApplicable(query);
if (possibleFilter != null) {
this.filter = possibleFilter;
Expand All @@ -61,6 +66,19 @@ public QueryFacetCollector(String facetName, Query query, FilterCache filterCach
}
}

@Override public void optimizedGlobalExecution(SearchContext searchContext) throws IOException {
Query query = this.query;
if (super.filter != null) {
query = new FilteredQuery(query, super.filter);
}
if (searchContext.types().length > 0) {
query = new FilteredQuery(query, searchContext.filterCache().cache(searchContext.mapperService().typesFilter(searchContext.types())));
}
TotalHitCountCollector collector = new TotalHitCountCollector();
searchContext.searcher().search(query, collector);
count = collector.getTotalHits();
}

@Override public Facet facet() {
return new InternalQueryFacet(facetName, count);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ public void addCollector(String scope, Collector collector) {
collectors.add(collector);
}

public List<Collector> removeCollectors(String scope) {
return scopeCollectors.remove(scope);
}

public boolean hasCollectors(String scope) {
if (scopeCollectors == null) {
return false;
Expand Down

0 comments on commit 2c0bb91

Please sign in to comment.