Skip to content

Commit

Permalink
More Like This Query: values of a multi-value fields are compared at …
Browse files Browse the repository at this point in the history
…the same level.

Previously, More Like This would create a new mlt query for each value of a
multi-value field. This could result in all the values of the field to be
selected, which defeats the purpose of More Like This. Instead, the correct
behavior is to generate only one mlt query for all the values of the field.
This commit provides the correct behavior for More Like This DSL. The fix for
More Like This API will be coming in another commit.

Closes #6310
  • Loading branch information
alexksikes committed Jun 3, 2014
1 parent a9acc59 commit d951825
Show file tree
Hide file tree
Showing 8 changed files with 1,075 additions and 44 deletions.
Expand Up @@ -21,7 +21,6 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
Expand All @@ -31,6 +30,7 @@
import org.elasticsearch.common.io.FastStringReader;

import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;

Expand All @@ -43,18 +43,18 @@ public class MoreLikeThisQuery extends Query {

private TFIDFSimilarity similarity;

private String likeText;
private String[] likeText;
private String[] moreLikeFields;
private Analyzer analyzer;
private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH;
private int minTermFrequency = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = MoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = MoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = MoreLikeThis.DEFAULT_BOOST;
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = XMoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = XMoreLikeThis.DEFAULT_BOOST;
private float boostTermsFactor = 1;


Expand All @@ -63,7 +63,7 @@ public MoreLikeThisQuery() {
}

public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer) {
this.likeText = likeText;
this.likeText = new String[]{likeText};
this.moreLikeFields = moreLikeFields;
this.analyzer = analyzer;
}
Expand All @@ -72,7 +72,7 @@ public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer anal
public int hashCode() {
int result = boostTerms ? 1 : 0;
result = 31 * result + Float.floatToIntBits(boostTermsFactor);
result = 31 * result + likeText.hashCode();
result = 31 * result + Arrays.hashCode(likeText);
result = 31 * result + maxDocFreq;
result = 31 * result + maxQueryTerms;
result = 31 * result + maxWordLen;
Expand All @@ -99,7 +99,7 @@ public boolean equals(Object obj) {
return false;
if (boostTermsFactor != other.boostTermsFactor)
return false;
if (!likeText.equals(other.likeText))
if (!(Arrays.equals(likeText, other.likeText)))
return false;
if (maxDocFreq != other.maxDocFreq)
return false;
Expand Down Expand Up @@ -132,7 +132,7 @@ public boolean equals(Object obj) {

@Override
public Query rewrite(IndexReader reader) throws IOException {
MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);

mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
Expand All @@ -145,10 +145,15 @@ public Query rewrite(IndexReader reader) throws IOException {
mlt.setStopWords(stopWords);
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);

Reader[] readers = new Reader[likeText.length];
for (int i = 0; i < readers.length; i++) {
readers[i] = new FastStringReader(likeText[i]);
}
//LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field)
BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]);
BooleanClause[] clauses = bq.getClauses();
BooleanQuery bq = (BooleanQuery) mlt.like(moreLikeFields[0], readers);

BooleanClause[] clauses = bq.getClauses();
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));

bq.setBoost(getBoost());
Expand All @@ -157,14 +162,22 @@ public Query rewrite(IndexReader reader) throws IOException {

@Override
public String toString(String field) {
return "like:" + likeText;
return "like:" + Arrays.toString(likeText);
}

public String getLikeText() {
return (likeText == null ? null : likeText[0]);
}

public String[] getLikeTexts() {
return likeText;
}

public void setLikeText(String likeText) {
this.likeText = new String[]{likeText};
}

public void setLikeText(String... likeText) {
this.likeText = likeText;
}

Expand Down

0 comments on commit d951825

Please sign in to comment.