Skip to content

Commit

Permalink
Added the ability to include the queried document for More Like This …
Browse files Browse the repository at this point in the history
…API.

By default More Like This API excludes the queried document from the response.
However, when debugging or when comparing scores across different queries, it
could be useful to have the best possible matched hit. So this option lets users
explicitly specify the desired behavior.

Closes #6067
  • Loading branch information
alexksikes committed May 9, 2014
1 parent 59d8256 commit 0c2069a
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 3 deletions.
3 changes: 3 additions & 0 deletions docs/reference/search/more-like-this.asciidoc
Expand Up @@ -25,5 +25,8 @@ Rest parameters relating to search are also allowed, including
When no `mlt_fields` are specified, all the fields of the document will
be used in the `more_like_this` query generated.

By default, the queried document is excluded from the response (`include`
set to false).

Note: In order to use the `mlt` feature a `mlt_field` needs to be either
be `stored`, store `term_vector` or `source` needs to be enabled.
Expand Up @@ -21,6 +21,7 @@

import org.elasticsearch.ElasticsearchGenerationException;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.ValidateActions;
Expand Down Expand Up @@ -72,6 +73,7 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
private int minWordLength = -1;
private int maxWordLength = -1;
private float boostTerms = -1;
private boolean include = false;

private SearchType searchType = SearchType.DEFAULT;
private int searchSize = 0;
Expand Down Expand Up @@ -313,6 +315,21 @@ public float boostTerms() {
return this.boostTerms;
}

/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public MoreLikeThisRequest include(boolean include) {
this.include = include;
return this;
}

/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public boolean include() {
return this.include;
}

void beforeLocalFork() {
if (searchSourceUnsafe) {
searchSource = searchSource.copyBytesArray();
Expand Down Expand Up @@ -553,6 +570,12 @@ public void readFrom(StreamInput in) throws IOException {
minWordLength = in.readVInt();
maxWordLength = in.readVInt();
boostTerms = in.readFloat();
if (in.getVersion().onOrAfter(Version.V_1_2_0)) {
include = in.readBoolean();
} else {
include = false; // hard-coded behavior until Elasticsearch 1.2
}

searchType = SearchType.fromId(in.readByte());
if (in.readBoolean()) {
searchQueryHint = in.readString();
Expand Down Expand Up @@ -622,6 +645,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(minWordLength);
out.writeVInt(maxWordLength);
out.writeFloat(boostTerms);
if (out.getVersion().onOrAfter(Version.V_1_2_0)) {
out.writeBoolean(include);
}

out.writeByte(searchType.id());
if (searchQueryHint == null) {
Expand Down
Expand Up @@ -140,6 +140,14 @@ public MoreLikeThisRequestBuilder setBoostTerms(float boostTerms) {
return this;
}

/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public MoreLikeThisRequestBuilder setInclude(boolean include) {
request.include(include);
return this;
}

/**
* An optional search source request allowing to control the search request for the
* more like this documents.
Expand Down
Expand Up @@ -178,9 +178,11 @@ public void onResponse(GetResponse getResponse) {
}

// exclude myself
Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
boolBuilder.adjustPureNegative(false);
if (!request.include()) {
Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
boolBuilder.adjustPureNegative(false);
}
} catch (Throwable e) {
listener.onFailure(e);
return;
Expand Down
Expand Up @@ -70,6 +70,7 @@ public void handleRequest(final RestRequest request, final RestChannel channel)
mltRequest.minWordLength(request.paramAsInt("min_word_len", request.paramAsInt("min_word_length", -1)));
mltRequest.maxWordLength(request.paramAsInt("max_word_len", request.paramAsInt("max_word_length", -1)));
mltRequest.boostTerms(request.paramAsFloat("boost_terms", -1));
mltRequest.include(request.paramAsBoolean("include", false));

mltRequest.searchType(SearchType.fromString(request.param("search_type")));
mltRequest.searchIndices(request.paramAsStringArray("search_indices", null));
Expand Down
36 changes: 36 additions & 0 deletions src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java
Expand Up @@ -252,4 +252,40 @@ public void testNumericField() throws Exception {
assertHitCount(searchResponse, 0l);
}

@Test
public void testSimpleMoreLikeInclude() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));

logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));

logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(
jsonBuilder().startObject()
.field("text", "Apache Lucene is a free/open source information retrieval software library").endObject()))
.actionGet();
client().index(indexRequest("test").type("type1").id("2").source(
jsonBuilder().startObject()
.field("text", "Lucene has been ported to other programming languages").endObject()))
.actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();

logger.info("Running More Like This with include true");
SearchResponse mltResponse = client().moreLikeThis(
moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
assertOrderedSearchHits(mltResponse, "1", "2");

mltResponse = client().moreLikeThis(
moreLikeThisRequest("test").type("type1").id("2").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
assertOrderedSearchHits(mltResponse, "2", "1");

logger.info("Running More Like This with include false");
mltResponse = client().moreLikeThis(moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1)).actionGet();
assertSearchHits(mltResponse, "2");
}

}

0 comments on commit 0c2069a

Please sign in to comment.