Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to lucene-5.2.0-snapshot-1673124. #10562

Merged
merged 2 commits into from Apr 14, 2015
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions pom.xml
Expand Up @@ -31,8 +31,8 @@
</parent>

<properties>
<lucene.version>5.1.0</lucene.version>
<lucene.maven.version>5.1.0-snapshot-1671894</lucene.maven.version>
<lucene.version>5.2.0</lucene.version>
<lucene.maven.version>5.2.0-snapshot-1673124</lucene.maven.version>
<tests.jvms>auto</tests.jvms>
<tests.shuffle>true</tests.shuffle>
<tests.output>onerror</tests.output>
Expand Down Expand Up @@ -66,7 +66,7 @@
<repository>
<id>lucene-snapshots</id>
<name>Lucene Snapshots</name>
<url>https://download.elastic.co/lucenesnapshots/1671894</url>
<url>https://download.elastic.co/lucenesnapshots/1673124</url>
</repository>
</repositories>

Expand Down
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.util.UnicodeUtil;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.util.*;

Expand Down Expand Up @@ -426,7 +427,7 @@ protected Map<Integer,Object> highlightField(String field, String contents[], Br
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
if (leaf != lastLeaf) {
termsEnum = t.iterator(null);
termsEnum = t.iterator();
postings = new PostingsEnum[terms.length];
}
Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
Expand Down Expand Up @@ -745,7 +746,8 @@ public LimitedStoredFieldVisitor(String fields[], char valueSeparators[], int ma
}

@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
String value = new String(bytes, StandardCharsets.UTF_8);
assert currentField >= 0;
StringBuilder builder = builders[currentField];
if (builder.length() > 0 && builder.length() < maxLength) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/elasticsearch/Version.java
Expand Up @@ -236,7 +236,7 @@ public class Version {
public static final int V_1_6_0_ID = 1060099;
public static final Version V_1_6_0 = new Version(V_1_6_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_10_4);
public static final int V_2_0_0_ID = 2000099;
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_5_1_0);
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_5_2_0);

public static final Version CURRENT = V_2_0_0;

Expand Down
Expand Up @@ -212,7 +212,7 @@ private void reset() throws IOException {
}

@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
public TermsEnum iterator() throws IOException {
// reset before asking for an iterator
reset();
// convert bytes ref for the terms to actual data
Expand Down
Expand Up @@ -201,7 +201,7 @@ private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fi
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator(null);
TermsEnum termIter = curTerms.iterator();
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter);
}
Expand Down
Expand Up @@ -52,10 +52,8 @@ final class TermVectorsWriter {

void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Flag> flags, Fields topLevelFields, @Nullable AggregatedDfs dfs) throws IOException {
int numFieldsWritten = 0;
TermsEnum iterator = null;
PostingsEnum docsAndPosEnum = null;
PostingsEnum docsEnum = null;
TermsEnum topLevelIterator = null;
for (String field : termVectorsByField) {
if ((selectedFields != null) && (!selectedFields.contains(field))) {
continue;
Expand All @@ -69,7 +67,7 @@ void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Fl
topLevelTerms = fieldTermVector;
}

topLevelIterator = topLevelTerms.iterator(topLevelIterator);
TermsEnum topLevelIterator = topLevelTerms.iterator();
boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
Expand All @@ -81,7 +79,7 @@ void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Fl
writeFieldStatistics(topLevelTerms);
}
}
iterator = fieldTermVector.iterator(iterator);
TermsEnum iterator = fieldTermVector.iterator();
final boolean useDocsAndPos = positions || offsets || payloads;
while (iterator.next() != null) { // iterate all terms of the
// current field
Expand Down
Expand Up @@ -53,13 +53,12 @@ public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types

// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
TermsEnum iterator = null;
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
iterator = terms.iterator(iterator);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
Expand Down
50 changes: 33 additions & 17 deletions src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java
Expand Up @@ -25,12 +25,12 @@
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
Expand All @@ -51,7 +51,7 @@ public AllTermQuery(Term term) {
}

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class overrides weight/scorer, where is its positions check (throw IAE if proximity is not enabled) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to fix it in Lucene too then

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you are a bit behind. I added tests for this position check last night and fixed bugs in the default impl.

// TODO: needsScores
// we should be able to just return a regular SpanTermWeight, at most here if needsScores == false?
return new AllTermWeight(this, searcher);
Expand All @@ -68,40 +68,56 @@ public AllTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) thro
if (this.stats == null) {
return null;
}
TermSpans spans = (TermSpans) query.getSpans(context, acceptDocs, termContexts);
if (spans == null) {
return null;
}
SimScorer sloppySimScorer = similarity.simScorer(stats, context);
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
return new AllTermSpanScorer(spans, this, sloppySimScorer);
}

protected class AllTermSpanScorer extends SpanScorer {
protected PostingsEnum positions;
protected float payloadScore;
protected int payloadsSeen;

public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
public AllTermSpanScorer(TermSpans spans, SpanWeight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
positions = spans.getPostings();
}

@Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
protected void setFreqCurrentDoc() throws IOException {
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
do {
int matchLength = spans.end() - spans.start();

freq += docScorer.computeSlopFactor(matchLength);
assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine for now, but note this stuff got excessive in all the spans and still would not find bugs. In order to make things debuggable i had to solve it another way: https://issues.apache.org/jira/browse/LUCENE-6411

So I am not sure about all the asserts, to me I just get lost in the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not want to do anything smart, this just copies over logic from SpanScorer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need the copy-paste at all? This whole thing seems like a code duplication of PayloadTermQuery.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And i just did not have the time to yet yesterday remove the stupid asserts from SpanScorer. Please, lets not drag this stuff in again. If oyu want to push fine, but you will see a second push from me removing all this crap.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The duplication comes from the fact that AllTermScorer needs to process the payload in the middle of the setFreqCurrentDoc loop.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is just what PayloadTermQuery does, too. So again how is this different from PayloadTermScorer? Even the variable/method names are similar.

assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
int prevStartPos = -1;
int prevEndPos = -1;

int startPos = spans.nextStartPosition();
assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
do {
assert startPos >= prevStartPos;
int endPos = spans.endPosition();
assert endPos != Spans.NO_MORE_POSITIONS;
// This assertion can fail for Or spans on the same term:
// assert (startPos != prevStartPos) || (endPos > prevEndPos) : "non increased endPos="+endPos;
assert (startPos != prevStartPos) || (endPos >= prevEndPos) : "decreased endPos="+endPos;
numMatches++;
int matchLength = endPos - startPos;
freq += docScorer.computeSlopFactor(matchLength);
processPayload();
prevStartPos = startPos;
prevEndPos = endPos;
startPos = spans.nextStartPosition();
} while (startPos != Spans.NO_MORE_POSITIONS);

more = spans.next();// this moves positions to the next match
} while (more && (doc == spans.doc()));
return true;
assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
}

protected void processPayload() throws IOException {
Expand All @@ -120,7 +136,7 @@ protected void processPayload() throws IOException {
* @throws IOException
*/
@Override
public float score() throws IOException {
public float scoreCurrentDoc() throws IOException {
return getSpanScore() * getPayloadScore();
}

Expand All @@ -134,7 +150,7 @@ public float score() throws IOException {
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.score();
return super.scoreCurrentDoc();
}

/**
Expand Down
Expand Up @@ -84,7 +84,7 @@ public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
TermsEnum termsEnum = terms.iterator();
if (termsEnum == null) {
continue;
}
Expand Down
Expand Up @@ -202,7 +202,7 @@ private void handleSkipTerms(XMoreLikeThis mlt, String[] ignoreText, Fields[] ig
for (Fields fields : ignoreFields) {
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
final TermsEnum termsEnum = terms.iterator(null);
final TermsEnum termsEnum = terms.iterator();
BytesRef text;
while ((text = termsEnum.next()) != null) {
skipTerms.add(new Term(fieldName, text.utf8ToString()));
Expand Down
Expand Up @@ -155,15 +155,14 @@ public Query rewrite(IndexReader reader) throws IOException {
private void getPrefixTerms(ObjectOpenHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
TermsEnum termsEnum = null;
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}

termsEnum = _terms.iterator(termsEnum);
TermsEnum termsEnum = _terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
Expand Down
Expand Up @@ -842,7 +842,7 @@ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) thro
* @param fieldName Optional field name of the terms for skip terms
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final TermsEnum termsEnum = vector.iterator();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while((text = termsEnum.next()) != null) {
Expand Down
Expand Up @@ -82,7 +82,7 @@ public PerThreadIDAndVersionLookup(IndexReader r) throws IOException {
if (terms != null) {
readerContexts[numSegs] = readerContext;
hasPayloads[numSegs] = terms.hasPayloads();
termsEnums[numSegs] = terms.iterator(null);
termsEnums[numSegs] = terms.iterator();
assert termsEnums[numSegs] != null;
liveDocs[numSegs] = readerContext.reader().getLiveDocs();
hasDeletions |= readerContext.reader().hasDeletions();
Expand Down
Expand Up @@ -239,40 +239,23 @@ public BloomFilter getFilter() {
}

@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TermsEnum result;
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
// recycle the existing BloomFilteredTermsEnum by asking the delegate
// to recycle its contained TermsEnum
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
if (bfte.filter == filter) {
bfte.reset(this.in);
return bfte;
}
reuse = bfte.reuse;
}
// We have been handed something we cannot reuse (either null, wrong
// class or wrong filter) so allocate a new object
result = new BloomFilteredTermsEnum(this.in, reuse, filter);
return result;
public TermsEnum iterator() throws IOException {
return new BloomFilteredTermsEnum(this.in, filter);
}
}

static final class BloomFilteredTermsEnum extends TermsEnum {

private Terms delegateTerms;
private TermsEnum delegateTermsEnum;
private TermsEnum reuse;
private BloomFilter filter;

public BloomFilteredTermsEnum(Terms other, TermsEnum reuse, BloomFilter filter) {
public BloomFilteredTermsEnum(Terms other, BloomFilter filter) {
this.delegateTerms = other;
this.reuse = reuse;
this.filter = filter;
}

void reset(Terms others) {
reuse = this.delegateTermsEnum;
this.delegateTermsEnum = null;
this.delegateTerms = others;
}
Expand All @@ -283,7 +266,7 @@ private TermsEnum getDelegate() throws IOException {
* this can be a relatively heavy operation depending on the
* delegate postings format and they underlying directory
* (clone IndexInput) */
delegateTermsEnum = delegateTerms.iterator(reuse);
delegateTermsEnum = delegateTerms.iterator();
}
return delegateTermsEnum;
}
Expand Down Expand Up @@ -385,7 +368,7 @@ public void write(Fields fields) throws IOException {
continue;
}
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
TermsEnum termsEnum = terms.iterator(null);
TermsEnum termsEnum = terms.iterator();

BloomFilter bloomFilter = null;

Expand Down
Expand Up @@ -39,7 +39,7 @@ public EngineSearcherFactory(EngineConfig engineConfig) {
}

@Override
public IndexSearcher newSearcher(IndexReader reader) throws IOException {
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(engineConfig.getSimilarity());
return searcher;
Expand Down
Expand Up @@ -1030,7 +1030,7 @@ class SearchFactory extends EngineSearcherFactory {
}

@Override
public IndexSearcher newSearcher(IndexReader reader) throws IOException {
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(engineConfig.getSimilarity());
if (warmer != null) {
Expand Down
Expand Up @@ -81,7 +81,7 @@ public IndexOrdinalsFieldData localGlobalDirect(IndexReader indexReader) throws
}

protected TermsEnum filter(Terms terms, LeafReader reader) throws IOException {
TermsEnum iterator = terms.iterator(null);
TermsEnum iterator = terms.iterator();
if (iterator == null) {
return null;
}
Expand Down
Expand Up @@ -108,7 +108,7 @@ public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Excep
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator()));
BytesRef term;
long numTerms = 0;
while ((term = iter.next()) != null) {
Expand Down
Expand Up @@ -106,7 +106,7 @@ public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Excep
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator()));
BytesRef term;
long numTerms = 0;
while ((term = iter.next()) != null) {
Expand Down