Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use segment ordinals as global ordinals if possible #5873

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -127,8 +127,12 @@ private Atomic(WithOrdinals afd, LongValues globalOrdToFirstSegment, LongValues
public BytesValues.WithOrdinals getBytesValues(boolean needsHashes) {
BytesValues.WithOrdinals values = afd.getBytesValues(false);
Ordinals.Docs segmentOrdinals = values.ordinals();
Ordinals.Docs globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);

final Ordinals.Docs globalOrdinals;
if (segmentOrdToGlobalOrdLookup != null) {
globalOrdinals = segmentOrdToGlobalOrdLookup.globalOrdinals(segmentOrdinals);
} else {
globalOrdinals = segmentOrdinals;
}
final BytesValues.WithOrdinals[] bytesValues = new BytesValues.WithOrdinals[atomicReaders.length];
for (int i = 0; i < bytesValues.length; i++) {
bytesValues[i] = atomicReaders[i].afd.getBytesValues(false);
Expand Down
Expand Up @@ -105,7 +105,8 @@ public IndexFieldData.WithOrdinals build(final IndexReader indexReader, IndexFie
breakerService.getBreaker().addWithoutBreaking(memorySizeInBytes);

if (logger.isDebugEnabled()) {
String implName = segmentOrdToGlobalOrdLookups[0].getClass().getSimpleName();
// this does include the [] from the array in the impl name
String implName = segmentOrdToGlobalOrdLookups.getClass().getSimpleName();
logger.debug(
"Global-ordinals[{}][{}][{}] took {} ms",
implName,
Expand Down Expand Up @@ -280,19 +281,31 @@ public OrdinalMappingSource[] build(long maxOrd) {
PackedIntOrdinalMappingSource[] sources = new PackedIntOrdinalMappingSource[numSegments];
for (int i = 0; i < newSegmentOrdToGlobalOrdDeltas.length; i++) {
PackedInts.Reader segmentOrdToGlobalOrdDelta = newSegmentOrdToGlobalOrdDeltas[i];
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
if (segmentOrdToGlobalOrdDelta.size() == maxOrd) {
// This means that a segment contains all the value and in that case segment ordinals
// can be used as global ordinals. This will save an extra lookup per hit.
sources[i] = null;
} else {
long ramUsed = segmentOrdToGlobalOrdDelta.ramBytesUsed();
sources[i] = new PackedIntOrdinalMappingSource(segmentOrdToGlobalOrdDelta, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}

}
return sources;
} else {
OrdinalMappingSource[] sources = new OrdinalMappingSource[segmentOrdToGlobalOrdDeltas.length];
for (int i = 0; i < segmentOrdToGlobalOrdDeltas.length; i++) {
MonotonicAppendingLongBuffer segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdDeltas[i];
segmentOrdToGlobalOrdLookup.freeze();
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
if (segmentOrdToGlobalOrdLookup.size() == maxOrd) {
// idem as above
sources[i] = null;
} else {
segmentOrdToGlobalOrdLookup.freeze();
long ramUsed = segmentOrdToGlobalOrdLookup.ramBytesUsed();
sources[i] = new CompressedOrdinalMappingSource(segmentOrdToGlobalOrdLookup, ramUsed, maxOrd);
memorySizeInBytesCounter += ramUsed;
}
}
return sources;
}
Expand Down
Expand Up @@ -71,6 +71,23 @@ protected final void collectBucket(int doc, long bucketOrd) throws IOException {
}
}

/**
* Same as {@link #collectBucket(int, long)}, but doesn't check if the docCounts needs to be re-sized.
*/
protected final void collectExistingBucket(int doc, long bucketOrd) throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have collectBucket call this method to avoid duplication?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure!

docCounts.increment(bucketOrd, 1);
for (int i = 0; i < collectableSugAggregators.length; i++) {
collectableSugAggregators[i].collect(doc, bucketOrd);
}
}

/**
* Initializes the docCounts to the specified size.
*/
public void initializeDocCounts(long maxOrd) {
docCounts = bigArrays.grow(docCounts, maxOrd);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary? the aggregator factory should create it with the right size directly? (if it does not I think this is a bug)


/**
* Utility method to collect the given doc in the given bucket but not to update the doc counts of the bucket
*/
Expand Down
Expand Up @@ -69,14 +69,15 @@ public boolean shouldCollect() {
public void setNextReader(AtomicReaderContext reader) {
globalValues = valuesSource.globalBytesValues();
globalOrdinals = globalValues.ordinals();
initializeDocCounts(globalOrdinals.getMaxOrd());
}

@Override
public void collect(int doc, long owningBucketOrdinal) throws IOException {
final int numOrds = globalOrdinals.setDocument(doc);
for (int i = 0; i < numOrds; i++) {
final long globalOrd = globalOrdinals.nextOrd();
collectBucket(doc, createBucketOrd(globalOrd));
collectExistingBucket(doc, createBucketOrd(globalOrd));
}
}

Expand Down