diff --git a/oshdb-api/pom.xml b/oshdb-api/pom.xml
index 513ac6d6e..53ee3c72e 100644
--- a/oshdb-api/pom.xml
+++ b/oshdb-api/pom.xml
@@ -90,6 +90,12 @@
annotations
${jetbrainsannotations.version}
+
+
+ com.tdunning
+ t-digest
+ 3.2
+
diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java
index f62e06d45..38e364730 100644
--- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java
+++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java
@@ -1,10 +1,14 @@
package org.heigit.bigspatialdata.oshdb.api.mapreducer;
import com.google.common.collect.Lists;
+import com.tdunning.math.stats.TDigest;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Polygonal;
import java.util.Map.Entry;
+import java.util.function.DoubleUnaryOperator;
+import java.util.function.Function;
import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.heigit.bigspatialdata.oshdb.api.generic.*;
@@ -19,7 +23,6 @@
import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTag;
import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagInterface;
-import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagKey;
import org.jetbrains.annotations.Contract;
import java.util.*;
@@ -416,14 +419,7 @@ public SortedMap> uniq(SerializableFunction mapper) throws E
*/
@Contract(pure = true)
public SortedMap countUniq() throws Exception {
- return this
- .uniq().entrySet().stream()
- .collect(Collectors.toMap(
- Map.Entry::getKey,
- e -> e.getValue().size(),
- (v1, v2) -> v1, // can't happen, actually since input is already a map
- TreeMap::new
- ));
+ return transformSortedMap(this.uniq(), Set::size);
}
/**
@@ -463,9 +459,8 @@ public SortedMap average(SerializableFunction weightedAverage(SerializableFunction mapper) throws Exception {
- return this
- .map(mapper)
- .reduce(
+ return transformSortedMap(
+ this.map(mapper).reduce(
() -> new PayloadWithWeight<>(0.0,0.0),
(acc, cur) -> {
acc.num = NumberUtils.add(acc.num, cur.getValue().doubleValue()*cur.getWeight());
@@ -473,12 +468,151 @@ public SortedMap weightedAverage(SerializableFunction new PayloadWithWeight<>(NumberUtils.add(a.num, b.num), a.weight+b.weight)
- ).entrySet().stream().collect(Collectors.toMap(
- Map.Entry::getKey,
- e -> e.getValue().num / e.getValue().weight,
- (v1, v2) -> v1,
- TreeMap::new
- ));
+ ),
+ x -> x.num / x.weight
+ );
+ }
+
+ /**
+ * Returns an estimate of the median of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return estimated median
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedMedian() throws Exception {
+ return this.estimatedQuantile(0.5);
+ }
+
+ /**
+ * Returns an estimate of the median of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the mean for
+ * @return estimated median
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedMedian(SerializableFunction mapper) throws Exception {
+ return this.estimatedQuantile(mapper, 0.5);
+ }
+
+ /**
+ * Returns an estimate of a requested quantile of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedQuantile(double q) throws Exception {
+ return this.makeNumeric().estimatedQuantile(n -> n, q);
+ }
+
+ /**
+ * Returns an estimate of a requested quantile of the results after applying the given map
+ * function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantile for
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedQuantile(
+ SerializableFunction mapper,
+ double q
+ ) throws Exception {
+ return transformSortedMap(this.estimatedQuantiles(mapper), qFunction -> qFunction.applyAsDouble(q));
+ }
+
+ /**
+ * Returns an estimate of the quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public SortedMap> estimatedQuantiles(Iterable q) throws Exception {
+ return this.makeNumeric().estimatedQuantiles(n -> n, q);
+ }
+
+ /**
+ * Returns an estimate of the quantiles of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public SortedMap> estimatedQuantiles(
+ SerializableFunction mapper,
+ Iterable q
+ ) throws Exception {
+ return transformSortedMap(
+ this.estimatedQuantiles(mapper),
+ quantileFunction -> StreamSupport.stream(q.spliterator(), false)
+ .mapToDouble(Double::doubleValue)
+ .map(quantileFunction)
+ .boxed()
+ .collect(Collectors.toList())
+ );
+ }
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return a function that computes estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedQuantiles() throws Exception {
+ return this.makeNumeric().estimatedQuantiles(n -> n);
+ }
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results after applying
+ * the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @return a function that computes estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public SortedMap estimatedQuantiles(
+ SerializableFunction mapper
+ ) throws Exception {
+ return transformSortedMap(this.digest(mapper), d -> d::quantile);
+ }
+
+ /**
+ * generates the t-digest of the complete result set. see:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ */
+ @Contract(pure = true)
+ private SortedMap digest(SerializableFunction mapper) throws Exception {
+ return this.map(mapper).reduce(
+ TDigestReducer::identitySupplier,
+ TDigestReducer::accumulator,
+ TDigestReducer::combiner
+ );
}
// -----------------------------------------------------------------------------------------------
@@ -732,4 +866,14 @@ private Collection> _completeZerofill(Set> keys, List> zerofil
).collect(Collectors.toList());
}
}
+
+ // transforms the values of a sorted map by a given function (similar to Stream::map)
+ private SortedMap transformSortedMap(SortedMap in, Function transform) {
+ return in.entrySet().stream().collect(Collectors.toMap(
+ Entry::getKey,
+ e -> transform.apply(e.getValue()),
+ (v1, v2) -> { assert false; return v1; },
+ TreeMap::new
+ ));
+ }
}
diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java
index 38eb5e61c..afd5f3fad 100644
--- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java
+++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java
@@ -1,8 +1,13 @@
package org.heigit.bigspatialdata.oshdb.api.mapreducer;
import com.google.common.collect.Iterables;
+import com.tdunning.math.stats.MergingDigest;
+import com.tdunning.math.stats.TDigest;
import java.sql.Connection;
+import java.util.function.DoubleUnaryOperator;
+import java.util.stream.Collectors;
import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
import org.heigit.bigspatialdata.oshdb.util.celliterator.CellIterator;
import org.heigit.bigspatialdata.oshdb.util.exceptions.OSHDBKeytablesNotFoundException;
import org.heigit.bigspatialdata.oshdb.util.geometry.OSHDBGeometryBuilder;
@@ -1136,6 +1141,144 @@ public Double weightedAverage(SerializableFunction mapper) thr
return runningSums.num / runningSums.weight;
}
+ /**
+ * Returns an estimate of the median of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return estimated median
+ */
+ @Contract(pure = true)
+ public Double estimatedMedian() throws Exception {
+ return this.estimatedQuantile(0.5);
+ }
+
+ /**
+ * Returns an estimate of the median of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the mean for
+ * @return estimated median
+ */
+ @Contract(pure = true)
+ public Double estimatedMedian(SerializableFunction mapper) throws Exception {
+ return this.estimatedQuantile(mapper, 0.5);
+ }
+
+ /**
+ * Returns an estimate of a requested quantile of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ @Contract(pure = true)
+ public Double estimatedQuantile(double q) throws Exception {
+ return this.makeNumeric().estimatedQuantile(n -> n, q);
+ }
+
+ /**
+ * Returns an estimate of a requested quantile of the results after applying the given map
+ * function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantile for
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ @Contract(pure = true)
+ public Double estimatedQuantile(SerializableFunction mapper, double q)
+ throws Exception {
+ return this.estimatedQuantiles(mapper).applyAsDouble(q);
+ }
+
+ /**
+ * Returns an estimate of the quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public List estimatedQuantiles(Iterable q) throws Exception {
+ return this.makeNumeric().estimatedQuantiles(n -> n, q);
+ }
+
+ /**
+ * Returns an estimate of the quantiles of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public List estimatedQuantiles(
+ SerializableFunction mapper,
+ Iterable q
+ ) throws Exception {
+ return StreamSupport.stream(q.spliterator(), false)
+ .mapToDouble(Double::doubleValue)
+ .map(this.estimatedQuantiles(mapper))
+ .boxed()
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return a function that computes estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public DoubleUnaryOperator estimatedQuantiles() throws Exception {
+ return this.makeNumeric().estimatedQuantiles(n -> n);
+ }
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results after applying
+ * the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @return a function that computes estimated quantile boundaries
+ */
+ @Contract(pure = true)
+ public DoubleUnaryOperator estimatedQuantiles(
+ SerializableFunction mapper
+ ) throws Exception {
+ TDigest digest = this.digest(mapper);
+ return digest::quantile;
+ }
+
+ /**
+ * generates the t-digest of the complete result set. see:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ */
+ @Contract(pure = true)
+ private TDigest digest(SerializableFunction mapper) throws Exception {
+ return this.map(mapper).reduce(
+ TDigestReducer::identitySupplier,
+ TDigestReducer::accumulator,
+ TDigestReducer::combiner
+ );
+ }
+
// -----------------------------------------------------------------------------------------------
// "Iterator" like helpers (stream, collect)
// -----------------------------------------------------------------------------------------------
@@ -1487,6 +1630,9 @@ protected TagInterpreter _getTagInterpreter() throws ParseException, SQLExceptio
protected TagTranslator _getTagTranslator() {
if (this._tagTranslator == null) {
try {
+ if (this._oshdbForTags == null) {
+ throw new OSHDBKeytablesNotFoundException();
+ }
this._tagTranslator = new TagTranslator(this._oshdbForTags.getConnection());
} catch (OSHDBKeytablesNotFoundException e) {
LOG.error(e.getMessage());
@@ -1647,3 +1793,36 @@ public Object apply(Object o) {
return this.mapper.apply(o);
}
}
+
+class TDigestReducer {
+
+ /**
+ * a COMPRESSION parameter of 1000 should provide relatively precise results, while not being
+ * too demanding on memory usage. See page 20 in the paper [1]:
+ *
+ * > Compression parameter (1/δ) was […] 1000 in order to reliably achieve 0.1% accuracy
+ *
+ * [1] https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ */
+ private final static int COMPRESSION = 1000;
+
+ static TDigest identitySupplier() {
+ return new MergingDigest(COMPRESSION);
+ }
+
+ static TDigest accumulator(TDigest acc, R cur) {
+ acc.add(cur.doubleValue(), 1);
+ return acc;
+ }
+
+ static TDigest combiner(TDigest a, TDigest b) {
+ if (a.size() == 0) {
+ return b;
+ } else if (b.size() == 0) {
+ return a;
+ }
+ MergingDigest r = new MergingDigest(COMPRESSION);
+ r.add(Arrays.asList(a, b));
+ return r;
+ }
+}
\ No newline at end of file
diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java
index 17254ae8a..2dbcc378f 100644
--- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java
+++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java
@@ -144,10 +144,110 @@ interface MapReducerAggregations {
*/
Object weightedAverage(SerializableFunction mapper) throws Exception;
+ /**
+ * Returns an estimate of the median of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return estimated median
+ */
+ Object estimatedMedian() throws Exception;
+
+ /**
+ * Returns an estimate of the median of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the mean for
+ * @return estimated median
+ */
+ Object estimatedMedian(SerializableFunction mapper) throws Exception;
+
+ /**
+ * Returns an estimate of a requested quantile of the results.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ Object estimatedQuantile(double q) throws Exception;
+
+ /**
+ * Returns an estimate of a requested quantile of the results after applying the given map
+ * function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantile for
+ * @param q the desired quantile to calculate (as a number between 0 and 1)
+ * @return estimated quantile boundary
+ */
+ Object estimatedQuantile(SerializableFunction mapper, double q) throws Exception;
+
+ /**
+ * Returns an estimate of the quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ Object estimatedQuantiles(Iterable q) throws Exception;
+
+ /**
+ * Returns an estimate of the quantiles of the results after applying the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1)
+ * @return estimated quantile boundaries
+ */
+ Object estimatedQuantiles(
+ SerializableFunction mapper,
+ Iterable q
+ ) throws Exception;
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @return a function that computes estimated quantile boundaries
+ */
+ Object estimatedQuantiles() throws Exception;
+
+ /**
+ * Returns a function that computes estimates of arbitrary quantiles of the results after applying
+ * the given map function.
+ *
+ * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system:
+ * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf
+ *
+ * @param mapper function that returns the numbers to generate the quantiles for
+ * @return a function that computes estimated quantile boundaries
+ */
+ Object estimatedQuantiles(SerializableFunction mapper) throws Exception;
+
/**
* Collects all results into List(s)
*
* @return list(s) with all results returned by the `mapper` function
*/
Object collect() throws Exception;
+
+ /**
+ * Returns all results as a Stream
+ *
+ * @return a stream with all results returned by the `mapper` function
+ */
+ Object stream() throws Exception;
}
diff --git a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java
new file mode 100644
index 000000000..42a6ecede
--- /dev/null
+++ b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java
@@ -0,0 +1,200 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.heigit.bigspatialdata.oshdb.api.tests;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.function.DoubleUnaryOperator;
+import org.heigit.bigspatialdata.oshdb.api.db.OSHDBDatabase;
+import org.heigit.bigspatialdata.oshdb.api.db.OSHDBH2;
+import org.heigit.bigspatialdata.oshdb.api.mapreducer.MapAggregator;
+import org.heigit.bigspatialdata.oshdb.api.mapreducer.MapReducer;
+import org.heigit.bigspatialdata.oshdb.api.mapreducer.OSMEntitySnapshotView;
+import org.heigit.bigspatialdata.oshdb.api.object.OSMEntitySnapshot;
+import org.heigit.bigspatialdata.oshdb.osm.OSMType;
+import org.heigit.bigspatialdata.oshdb.util.OSHDBBoundingBox;
+import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp;
+import org.heigit.bigspatialdata.oshdb.util.time.OSHDBTimestamps;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class TestQuantiles {
+ private final OSHDBDatabase oshdb;
+
+ private final OSHDBBoundingBox bbox = new OSHDBBoundingBox(8.651133,49.387611,8.6561,49.390513);
+ private final OSHDBTimestamps timestamps1 = new OSHDBTimestamps("2015-01-01");
+ private final OSHDBTimestamps timestamps2 = new OSHDBTimestamps("2014-01-01", "2015-01-01");
+
+ private final double REQUIRED_ACCURACY = 1E-4;
+
+ public TestQuantiles() throws Exception {
+ oshdb = new OSHDBH2("./src/test/resources/test-data");
+ }
+
+ private void assertApproximateQuantiles(
+ List extends Number> values, double quantile, double result) {
+
+ double quantileIndex = (values.size() - 1) * quantile;
+ int quantileBoundLower = (int) Math.floor(quantileIndex);
+ double quantileAmountUpper = quantileIndex - quantileBoundLower;
+ int quantileBoundUpper = (int) Math.ceil(quantileIndex);
+ double quantileAmountLower = 1 - quantileAmountUpper;
+ double expectedResult = (
+ quantileAmountLower * values.get(quantileBoundLower).doubleValue() +
+ quantileAmountUpper * values.get(quantileBoundUpper).doubleValue()
+ );
+
+ assertEquals(expectedResult, result, expectedResult * REQUIRED_ACCURACY);
+ }
+
+ // MapReducer
+
+ private MapReducer createMapReducer() {
+ return OSMEntitySnapshotView.on(oshdb)
+ .timestamps(timestamps1)
+ .osmType(OSMType.WAY)
+ .osmTag("building", "yes")
+ .areaOfInterest(bbox);
+ }
+
+ @Test
+ public void testMedian() throws Exception {
+ MapReducer mr = this.createMapReducer()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ List fullResult = mr.collect();
+ Collections.sort(fullResult);
+
+ assertApproximateQuantiles(fullResult, 0.5, mr.estimatedMedian());
+ }
+
+ @Test
+ public void testQuantile() throws Exception {
+ MapReducer mr = this.createMapReducer()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ List fullResult = mr.collect();
+ Collections.sort(fullResult);
+
+ assertApproximateQuantiles(fullResult, 0.8, mr.estimatedQuantile(0.8));
+ }
+
+ @Test
+ public void testQuantiles() throws Exception {
+ MapReducer mr = this.createMapReducer()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ List fullResult = mr.collect();
+ Collections.sort(fullResult);
+
+ List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0);
+ List quantiles = mr.estimatedQuantiles(qs);
+
+ for (Double quantile : quantiles) {
+ assertApproximateQuantiles(fullResult, qs.get(quantiles.indexOf(quantile)), quantile);
+ }
+ }
+
+ @Test
+ public void testQuantilesFunction() throws Exception {
+ MapReducer mr = this.createMapReducer()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ List fullResult = mr.collect();
+ Collections.sort(fullResult);
+
+ List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0);
+ DoubleUnaryOperator quantilesFunction = mr.estimatedQuantiles();
+
+ for (Double q : qs) {
+ assertApproximateQuantiles(fullResult, q, quantilesFunction.applyAsDouble(q));
+ }
+ }
+
+ // MapAggregator
+
+ private MapAggregator createMapAggregator() {
+ return OSMEntitySnapshotView.on(oshdb)
+ .timestamps(timestamps2)
+ .osmType(OSMType.WAY)
+ .osmTag("building", "yes")
+ .areaOfInterest(bbox)
+ .aggregateByTimestamp();
+ }
+
+ @Test
+ public void testMedianMapAggregator() throws Exception {
+ MapAggregator mr = this.createMapAggregator()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ SortedMap> fullResult = mr.collect();
+ fullResult.values().forEach(Collections::sort);
+
+ SortedMap medians = mr.estimatedQuantile(0.8);
+
+ medians.forEach((ts, median) ->
+ assertApproximateQuantiles(fullResult.get(ts), 0.8, median)
+ );
+ }
+
+ @Test
+ public void testQuantileMapAggregator() throws Exception {
+ MapAggregator mr = this.createMapAggregator()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ SortedMap> fullResult = mr.collect();
+ fullResult.values().forEach(Collections::sort);
+
+ SortedMap quantiles = mr.estimatedQuantile(0.8);
+
+ quantiles.forEach((ts, quantile) ->
+ assertApproximateQuantiles(fullResult.get(ts), 0.8, quantile)
+ );
+ }
+
+ @Test
+ public void testQuantilesMapAggregator() throws Exception {
+ MapAggregator mr = this.createMapAggregator()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ SortedMap> fullResult = mr.collect();
+ fullResult.values().forEach(Collections::sort);
+
+ List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0);
+ SortedMap> quantiless = mr.estimatedQuantiles(qs);
+
+ quantiless.forEach((ts, quantiles) -> {
+ for (Double quantile : quantiles) {
+ assertApproximateQuantiles(
+ fullResult.get(ts),
+ qs.get(quantiles.indexOf(quantile)),
+ quantile
+ );
+ }
+ });
+ }
+
+ @Test
+ public void testQuantilesFunctionMapAggregator() throws Exception {
+ MapAggregator mr = this.createMapAggregator()
+ .map(s -> s.getGeometry().getCoordinates().length);
+ SortedMap> fullResult = mr.collect();
+ fullResult.values().forEach(Collections::sort);
+
+ List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0);
+ SortedMap quantilesFunctions = mr.estimatedQuantiles();
+
+ quantilesFunctions.forEach((ts, quantilesFunction) -> {
+ for (Double q : qs) {
+ assertApproximateQuantiles(
+ fullResult.get(ts),
+ q,
+ quantilesFunction.applyAsDouble(q)
+ );
+ }
+ });
+ }
+
+}