Skip to content

Commit

Permalink
Aggregations: Fix geohash grid aggregation on multi-valued fields.
Browse files Browse the repository at this point in the history
This aggregation creates an anonymous fielddata instance that takes geo points
and turns them into a geo hash encoded as a long. A bug was introduced in 1.4
because of a fielddata refactoring: the fielddata instance tries to populate
an array with values without first making sure that it is large enough.

Close #8507
  • Loading branch information
jpountz committed Nov 20, 2014
1 parent f30a0e8 commit a94fb92
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 29 deletions.
Expand Up @@ -29,7 +29,7 @@
*/
public abstract class SortingNumericDocValues extends SortedNumericDocValues {

protected int count;
private int count;
protected long[] values;
private final Sorter sorter;

Expand All @@ -52,9 +52,11 @@ protected int compare(int i, int j) {
}

/**
* Make sure the {@link #values} array can store at least {@link #count} entries.
* Set the {@link #count()} and ensure that the {@link #values} array can
* store at least that many entries.
*/
protected final void grow() {
protected final void resize(int newSize) {
count = newSize;
values = ArrayUtil.grow(values, count);
}

Expand Down
Expand Up @@ -149,8 +149,8 @@ protected CellValues(ValuesSource.GeoPoint geoPointValues, int precision) {
public void setDocument(int docId) {
geoValues = geoPointValues.geoPointValues();
geoValues.setDocument(docId);
count = geoValues.count();
for (int i = 0; i < count; ++i) {
resize(geoValues.count());
for (int i = 0; i < count(); ++i) {
GeoPoint target = geoValues.valueAt(i);
values[i] = GeoHashUtils.encodeAsLong(target.getLat(), target.getLon(), precision);
}
Expand Down
Expand Up @@ -484,9 +484,8 @@ public LongValues(Numeric source, SearchScript script) {
public void setDocument(int docId) {
script.setNextDocId(docId);
source.longValues().setDocument(docId);
count = source.longValues().count();
grow();
for (int i = 0; i < count; ++i) {
resize(source.longValues().count());
for (int i = 0; i < count(); ++i) {
script.setNextVar("_value", source.longValues().valueAt(i));
values[i] = script.runAsLong();
}
Expand Down
Expand Up @@ -51,30 +51,28 @@ public void setDocument(int docId) {
final Object value = script.run();

if (value == null) {
count = 0;
resize(0);
}

else if (value instanceof Number) {
count = 1;
resize(1);
values[0] = ((Number) value).longValue();
}

else if (value.getClass().isArray()) {
count = Array.getLength(value);
grow();
for (int i = 0; i < count; ++i) {
resize(Array.getLength(value));
for (int i = 0; i < count(); ++i) {
values[i] = ((Number) Array.get(value, i)).longValue();
}
}

else if (value instanceof Collection) {
count = ((Collection<?>) value).size();
grow();
resize(((Collection<?>) value).size());
int i = 0;
for (Iterator<?> it = ((Collection<?>) value).iterator(); it.hasNext(); ++i) {
values[i] = ((Number) it.next()).longValue();
}
assert i == count;
assert i == count();
}

else {
Expand Down
Expand Up @@ -33,8 +33,11 @@
import org.junit.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
Expand All @@ -46,39 +49,43 @@
@ElasticsearchIntegrationTest.SuiteScopeTest
public class GeoHashGridTests extends ElasticsearchIntegrationTest {

private IndexRequestBuilder indexCity(String name, String latLon) throws Exception {
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
static ObjectIntMap<String> multiValuedExpectedDocCountsForGeoHash = null;
static int highestPrecisionGeohash = 12;
static int numDocs = 100;

static String smallestGeoHash = null;

private static IndexRequestBuilder indexCity(String index, String name, List<String> latLon) throws Exception {
XContentBuilder source = jsonBuilder().startObject().field("city", name);
if (latLon != null) {
source = source.field("location", latLon);
}
source = source.endObject();
return client().prepareIndex("idx", "type").setSource(source);
return client().prepareIndex(index, "type").setSource(source);
}


static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
static int highestPrecisionGeohash = 12;
static int numRandomPoints = 100;

static String smallestGeoHash = null;
private static IndexRequestBuilder indexCity(String index, String name, String latLon) throws Exception {
return indexCity(index, name, Arrays.<String>asList(latLon));
}

@Override
public void setupSuiteScopeCluster() throws Exception {
createIndex("idx_unmapped");

assertAcked(prepareCreate("idx")
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));

createIndex("idx_unmapped");

List<IndexRequestBuilder> cities = new ArrayList<>();
Random random = getRandom();
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numRandomPoints * 2);
for (int i = 0; i < numRandomPoints; i++) {
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
for (int i = 0; i < numDocs; i++) {
//generate random point
double lat = (180d * random.nextDouble()) - 90d;
double lng = (360d * random.nextDouble()) - 180d;
String randomGeoHash = GeoHashUtils.encode(lat, lng, highestPrecisionGeohash);
//Index at the highest resolution
cities.add(indexCity(randomGeoHash, lat + ", " + lng));
cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng));
expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1);
//Update expected doc counts for all resolutions..
for (int precision = highestPrecisionGeohash - 1; precision > 0; precision--) {
Expand All @@ -90,6 +97,35 @@ public void setupSuiteScopeCluster() throws Exception {
}
}
indexRandom(true, cities);

assertAcked(prepareCreate("multi_valued_idx")
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));

cities = new ArrayList<>();
multiValuedExpectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
for (int i = 0; i < numDocs; i++) {
final int numPoints = random.nextInt(4);
List<String> points = new ArrayList<>();
// TODO (#8512): this should be a Set, not a List. Currently if a document has two positions that have
// the same geo hash, it will increase the doc_count for this geo hash by 2 instead of 1
List<String> geoHashes = new ArrayList<>();
for (int j = 0; j < numPoints; ++j) {
double lat = (180d * random.nextDouble()) - 90d;
double lng = (360d * random.nextDouble()) - 180d;
points.add(lat + "," + lng);
// Update expected doc counts for all resolutions..
for (int precision = highestPrecisionGeohash; precision > 0; precision--) {
final String geoHash = GeoHashUtils.encode(lat, lng, precision);
geoHashes.add(geoHash);
}
}
cities.add(indexCity("multi_valued_idx", Integer.toString(i), points));
for (String hash : geoHashes) {
multiValuedExpectedDocCountsForGeoHash.put(hash, multiValuedExpectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1);
}
}
indexRandom(true, cities);

ensureSearchable();
}

Expand Down Expand Up @@ -119,6 +155,31 @@ public void simple() throws Exception {
}
}

@Test
public void multivalued() throws Exception {
for (int precision = 1; precision <= highestPrecisionGeohash; precision++) {
SearchResponse response = client().prepareSearch("multi_valued_idx")
.addAggregation(geohashGrid("geohashgrid")
.field("location")
.precision(precision)
)
.execute().actionGet();

assertSearchResponse(response);

GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
for (GeoHashGrid.Bucket cell : geoGrid.getBuckets()) {
String geohash = cell.getKey();

long bucketCount = cell.getDocCount();
int expectedBucketCount = multiValuedExpectedDocCountsForGeoHash.get(geohash);
assertNotSame(bucketCount, 0);
assertEquals("Geohash " + geohash + " has wrong doc count ",
expectedBucketCount, bucketCount);
}
}
}

@Test
public void filtered() throws Exception {
GeoBoundingBoxFilterBuilder bbox = new GeoBoundingBoxFilterBuilder("location");
Expand Down

0 comments on commit a94fb92

Please sign in to comment.