Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create only one MLT query per field for all queried items #6404

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -27,11 +27,13 @@
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.FastStringReader;

import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import java.util.Set;

/**
Expand Down Expand Up @@ -174,13 +176,17 @@ public String[] getLikeTexts() {
}

public void setLikeText(String likeText) {
this.likeText = new String[]{likeText};
setLikeText(new String[]{likeText});
}

public void setLikeText(String... likeText) {
this.likeText = likeText;
}

public void setLikeText(List<String> likeText) {
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
}

public String[] getMoreLikeFields() {
return moreLikeFields;
}
Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.query;

import com.google.common.collect.Lists;
import com.google.common.collect.ObjectArrays;
import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.TermsFilter;
Expand Down Expand Up @@ -207,9 +208,11 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
}
// fetching the items with multi-get
List<LikeText> likeTexts = fetchService.fetch(items);
// collapse the text onto the same field name
Collection<LikeText> likeTextsCollapsed = collapseTextOnField(likeTexts);
// right now we are just building a boolean query
BooleanQuery boolQuery = new BooleanQuery();
for (LikeText likeText : likeTexts) {
for (LikeText likeText : likeTextsCollapsed) {
addMoreLikeThis(boolQuery, mltQuery, likeText);
}
// exclude the items from the search
Expand Down Expand Up @@ -260,6 +263,19 @@ private List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyz
return moreLikeFields;
}

public static Collection<LikeText> collapseTextOnField (Collection<LikeText> likeTexts) {
Map<String, LikeText> collapsedTexts = new HashMap<>();
for (LikeText likeText : likeTexts) {
String field = likeText.field;
String[] text = likeText.text;
if (collapsedTexts.containsKey(field)) {
text = ObjectArrays.concat(collapsedTexts.get(field).text, text, String.class);
}
collapsedTexts.put(field, new LikeText(field, text));
}
return collapsedTexts.values();
}

private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
}
Expand Down
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.compress.CompressedString;
import org.elasticsearch.common.inject.AbstractModule;
Expand Down Expand Up @@ -65,6 +66,7 @@
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService.LikeText;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.index.similarity.SimilarityModule;
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
Expand Down Expand Up @@ -1680,16 +1682,11 @@ public void testMoreLikeThisIds() throws Exception {
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
parser.setFetchService(new MockMoreLikeThisFetchService());

List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
String index = "test";
String type = "person";
for (int i = 1; i < 5; i++) {
for (String field : new String[]{"name.first", "name.last"}) {
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
field, index + " " + type + " " + i + " " + field);
likeTexts.add(likeText);
}
}
List<LikeText> likeTexts = new ArrayList<>();
likeTexts.add(new LikeText("name.first", new String[]{
"test person 1 name.first", "test person 2 name.first", "test person 3 name.first", "test person 4 name.first"}));
likeTexts.add(new LikeText("name.last", new String[]{
"test person 1 name.last", "test person 2 name.last", "test person 3 name.last", "test person 4 name.last"}));

IndexQueryParserService queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
Expand All @@ -1700,7 +1697,7 @@ public void testMoreLikeThisIds() throws Exception {

// check each clause is for each item
BooleanClause[] boolClauses = booleanQuery.getClauses();
for (int i=0; i<likeTexts.size(); i++) {
for (int i = 0; i < likeTexts.size(); i++) {
BooleanClause booleanClause = booleanQuery.getClauses()[i];
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
Expand Down