Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Values of a multi-value fields are compared at the same level #6310

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -21,7 +21,6 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
Expand All @@ -31,6 +30,7 @@
import org.elasticsearch.common.io.FastStringReader;

import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Set;

Expand All @@ -43,18 +43,18 @@ public class MoreLikeThisQuery extends Query {

private TFIDFSimilarity similarity;

private String likeText;
private String[] likeText;
private String[] moreLikeFields;
private Analyzer analyzer;
private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH;
private int minTermFrequency = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = MoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = MoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = MoreLikeThis.DEFAULT_BOOST;
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
private Set<?> stopWords = XMoreLikeThis.DEFAULT_STOP_WORDS;
private int minDocFreq = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
private int minWordLen = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
private int maxWordLen = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
private boolean boostTerms = XMoreLikeThis.DEFAULT_BOOST;
private float boostTermsFactor = 1;


Expand All @@ -63,7 +63,7 @@ public MoreLikeThisQuery() {
}

public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer) {
this.likeText = likeText;
this.likeText = new String[]{likeText};
this.moreLikeFields = moreLikeFields;
this.analyzer = analyzer;
}
Expand All @@ -72,7 +72,7 @@ public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer anal
public int hashCode() {
int result = boostTerms ? 1 : 0;
result = 31 * result + Float.floatToIntBits(boostTermsFactor);
result = 31 * result + likeText.hashCode();
result = 31 * result + Arrays.hashCode(likeText);
result = 31 * result + maxDocFreq;
result = 31 * result + maxQueryTerms;
result = 31 * result + maxWordLen;
Expand All @@ -99,7 +99,7 @@ public boolean equals(Object obj) {
return false;
if (boostTermsFactor != other.boostTermsFactor)
return false;
if (!likeText.equals(other.likeText))
if (!(Arrays.equals(likeText, other.likeText)))
return false;
if (maxDocFreq != other.maxDocFreq)
return false;
Expand Down Expand Up @@ -132,7 +132,7 @@ public boolean equals(Object obj) {

@Override
public Query rewrite(IndexReader reader) throws IOException {
MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);

mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
Expand All @@ -145,10 +145,15 @@ public Query rewrite(IndexReader reader) throws IOException {
mlt.setStopWords(stopWords);
mlt.setBoost(boostTerms);
mlt.setBoostFactor(boostTermsFactor);

Reader[] readers = new Reader[likeText.length];
for (int i = 0; i < readers.length; i++) {
readers[i] = new FastStringReader(likeText[i]);
}
//LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field)
BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]);
BooleanClause[] clauses = bq.getClauses();
BooleanQuery bq = (BooleanQuery) mlt.like(moreLikeFields[0], readers);

BooleanClause[] clauses = bq.getClauses();
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));

bq.setBoost(getBoost());
Expand All @@ -157,14 +162,22 @@ public Query rewrite(IndexReader reader) throws IOException {

@Override
public String toString(String field) {
return "like:" + likeText;
return "like:" + Arrays.toString(likeText);
}

public String getLikeText() {
return (likeText == null ? null : likeText[0]);
}

public String[] getLikeTexts() {
return likeText;
}

public void setLikeText(String likeText) {
this.likeText = new String[]{likeText};
}

public void setLikeText(String... likeText) {
this.likeText = likeText;
}

Expand Down