Skip to content

Commit

Permalink
More Like This Query: Added searching for multiple items.
Browse files Browse the repository at this point in the history
The syntax to specify one or more items is the same as for the Multi GET API.
If only one document is specified, the results returned are the same as when
using the More Like This API.

Relates #4075 Closes #5857
  • Loading branch information
alexksikes committed May 17, 2014
1 parent 7c0d8a4 commit 3d79986
Show file tree
Hide file tree
Showing 15 changed files with 939 additions and 130 deletions.
2 changes: 1 addition & 1 deletion docs/reference/docs/multi-get.asciidoc
Expand Up @@ -106,7 +106,7 @@ curl 'localhost:9200/_mget' -d '{
"_id" : "3",
"_source" : {
"include": ["user"],
"_exclude": ["user.location"]
"exclude": ["user.location"]
}
}
]
Expand Down
46 changes: 45 additions & 1 deletion docs/reference/query-dsl/queries/mlt-query.asciidoc
Expand Up @@ -16,6 +16,37 @@ running it against one or more fields.
}
--------------------------------------------------

Additionally, More Like This can find documents that are "like" a set of
chosen documents. The syntax to specify one or more documents is similar to
the <<docs-multi-get,Multi GET API>>, and supports the `ids` or `docs` array.
If only one document is specified, the query behaves the same as the
<<search-more-like-this,More Like This API>>.

[source,js]
--------------------------------------------------
{
"more_like_this" : {
"fields" : ["name.first", "name.last"],
"docs" : [
{
"_index" : "test",
"_type" : "type",
"_id" : "1"
},
{
"_index" : "test",
"_type" : "type",
"_id" : "2"
}
],
"ids" : ["3", "4"],
"min_term_freq" : 1,
"max_query_terms" : 12
}
}
--------------------------------------------------


`more_like_this` can be shortened to `mlt`.

Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
Expand All @@ -31,6 +62,10 @@ terms should be considered as interesting. In order to give more weight to
more interesting terms, each boolean clause associated with a term could be
boosted by the term tf-idf score times some boosting factor `boost_terms`.

When a search for multiple `docs` is issued, More Like This generates a
`more_like_this` query per document field in `fields`. These `fields` are
specified as a top level parameter or within each `doc`.

The `more_like_this` top level parameters include:

[cols="<,<",options="header",]
Expand All @@ -39,7 +74,16 @@ The `more_like_this` top level parameters include:
|`fields` |A list of the fields to run the more like this query against.
Defaults to the `_all` field.

|`like_text` |The text to find documents like it, *required*.
|`like_text` |The text to find documents like it, *required* if `ids` is
not specified.

|`ids` or `docs` |A list of documents following the same syntax as the
<<docs-multi-get,Multi GET API>>. This parameter is *required* if
`like_text` is not specified. The texts are fetched from `fields` unless
specified in each `doc`, and cannot be set to `_all`.

|`exclude` |When using `ids`, specifies whether the documents should be
excluded from the search. Defaults to `true`.

|`percent_terms_to_match` |The percentage of terms to match on (float
value). Defaults to `0.3` (30 percent).
Expand Down
274 changes: 168 additions & 106 deletions src/main/java/org/elasticsearch/action/get/MultiGetRequest.java

Large diffs are not rendered by default.

Expand Up @@ -35,7 +35,9 @@
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.cluster.routing.MutableShardRouting;
import org.elasticsearch.cluster.routing.ShardIterator;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.DocumentMissingException;
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/org/elasticsearch/index/mapper/Uid.java
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.lucene.BytesRefs;

import java.util.Collection;
Expand Down Expand Up @@ -94,6 +95,15 @@ public static Uid createUid(String uid) {
return new Uid(uid.substring(0, delimiterIndex), uid.substring(delimiterIndex + 1));
}

public static BytesRef[] createUids(List<MultiGetRequest.Item> items) {
BytesRef[] uids = new BytesRef[items.size()];
int idx = 0;
for (MultiGetRequest.Item item : items) {
uids[idx++] = createUidAsBytes(item);
}
return uids;
}

public static BytesRef createUidAsBytes(String type, String id) {
return createUidAsBytes(new BytesRef(type), new BytesRef(id));
}
Expand All @@ -102,6 +112,10 @@ public static BytesRef createUidAsBytes(String type, BytesRef id) {
return createUidAsBytes(new BytesRef(type), id);
}

public static BytesRef createUidAsBytes(MultiGetRequest.Item item) {
return createUidAsBytes(item.type(), item.id());
}

public static BytesRef createUidAsBytes(BytesRef type, BytesRef id) {
final BytesRef ref = new BytesRef(type.length + 1 + id.length);
System.arraycopy(type.bytes, type.offset, ref.bytes, 0, type.length);
Expand Down
Expand Up @@ -20,19 +20,89 @@
package org.elasticsearch.index.query;

import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.search.fetch.source.FetchSourceContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;

/**
* A more like this query that finds documents that are "like" the provided {@link #likeText(String)}
* which is checked against the fields the query is constructed with.
*/
public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder<MoreLikeThisQueryBuilder> {

/**
* A single get item. Pure delegate to multi get.
*/
public static final class Item extends MultiGetRequest.Item implements ToXContent {
public Item() {
super();
}

public Item(String index, @Nullable String type, String id) {
super(index, type, id);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (this.index() != null) {
builder.field("_index", this.index());
}
if (this.id() != null) {
builder.field("_id", this.id());
}
if (this.type() != null) {
builder.field("_type", this.type());
}
if (this.fields() != null) {
builder.array("fields", this.fields());
}
if (this.routing() != null) {
builder.field("_routing", this.routing());
}
if (this.fetchSourceContext() != null) {
FetchSourceContext source = this.fetchSourceContext();
String[] includes = source.includes();
String[] excludes = source.excludes();
if (includes.length == 0 && excludes.length == 0) {
builder.field("_source", source.fetchSource());
} else if (includes.length > 0 && excludes.length == 0) {
builder.array("_source", source.includes());
} else if (excludes.length > 0) {
builder.startObject("_source");
if (includes.length > 0) {
builder.array("includes", source.includes());
}
builder.array("excludes", source.excludes());
builder.endObject();
}
}
if (this.version() != Versions.MATCH_ANY) {
builder.field("_version", this.version());
}
if (this.versionType() != VersionType.INTERNAL) {
builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT));
}
return builder.endObject();
}
}

private final String[] fields;

private String likeText;
private List<String> ids = new ArrayList<>();
private List<Item> docs = new ArrayList<>();
private Boolean exclude = null;
private float percentTermsToMatch = -1;
private int minTermFreq = -1;
private int maxQueryTerms = -1;
Expand Down Expand Up @@ -71,6 +141,26 @@ public MoreLikeThisQueryBuilder likeText(String likeText) {
return this;
}

public MoreLikeThisQueryBuilder ids(String... ids) {
this.ids = Arrays.asList(ids);
return this;
}

public MoreLikeThisQueryBuilder docs(Item... docs) {
this.docs = Arrays.asList(docs);
return this;
}

public MoreLikeThisQueryBuilder addItem(Item item) {
this.docs.add(item);
return this;
}

public MoreLikeThisQueryBuilder exclude(boolean exclude) {
this.exclude = exclude;
return this;
}

/**
* The percentage of terms to match. Defaults to <tt>0.3</tt>.
*/
Expand Down Expand Up @@ -192,9 +282,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
}
builder.endArray();
}
if (likeText == null) {
throw new ElasticsearchIllegalArgumentException("moreLikeThis requires '"+
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided");
if (likeText == null && this.docs.isEmpty() && this.ids.isEmpty()) {
throw new ElasticsearchIllegalArgumentException("more_like_this requires either '"+
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided");
}
builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText);
if (percentTermsToMatch != -1) {
Expand Down Expand Up @@ -240,6 +330,15 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
if (queryName != null) {
builder.field("_name", queryName);
}
if (!ids.isEmpty()) {
builder.array("ids", ids.toArray());
}
if (!docs.isEmpty()) {
builder.array("docs", docs.toArray());
}
if (exclude != null) {
builder.field("exclude", exclude);
}
builder.endObject();
}
}

0 comments on commit 3d79986

Please sign in to comment.