From 6a320aaf2f027e4a3f141a90468bb5b9fa7a3c82 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 11 Oct 2018 18:32:14 +0200 Subject: [PATCH 1/9] implement calculation of quantiles this uses the t-digest [1] method to estimate quantiles of the distribution of the result. [1] https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf --- oshdb-api/pom.xml | 6 + .../oshdb/api/mapreducer/MapReducer.java | 119 ++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/oshdb-api/pom.xml b/oshdb-api/pom.xml index 513ac6d6e..53ee3c72e 100644 --- a/oshdb-api/pom.xml +++ b/oshdb-api/pom.xml @@ -90,6 +90,12 @@ annotations ${jetbrainsannotations.version} + + + com.tdunning + t-digest + 3.2 + diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index 38eb5e61c..30510f711 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -1,7 +1,10 @@ package org.heigit.bigspatialdata.oshdb.api.mapreducer; import com.google.common.collect.Iterables; +import com.tdunning.math.stats.MergingDigest; +import com.tdunning.math.stats.TDigest; import java.sql.Connection; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.heigit.bigspatialdata.oshdb.util.celliterator.CellIterator; import org.heigit.bigspatialdata.oshdb.util.exceptions.OSHDBKeytablesNotFoundException; @@ -1136,6 +1139,122 @@ public Double weightedAverage(SerializableFunction mapper) thr return runningSums.num / runningSums.weight; } + /** + * Returns an estimate of the median of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return estimated median + */ + @Contract(pure = true) + public Double median() throws Exception { + return this.quantile(0.5); + } + + /** + * Returns an estimate of the median of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the mean for + * @return estimated median + */ + @Contract(pure = true) + public Double median(SerializableFunction mapper) throws Exception { + return this.quantile(mapper, 0.5); + } + + /** + * Returns an estimate of a requested quantile of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + @Contract(pure = true) + public Double quantile(double q) throws Exception { + return this.makeNumeric().quantile(n -> n, q); + } + + /** + * Returns an estimate of a requested quantile of the results after applying the given map + * function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantile for + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + @Contract(pure = true) + public Double quantile(SerializableFunction mapper, double q) + throws Exception { + return this.digest(mapper).quantile(q); + } + + /** + * Returns an estimate of the quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + @Contract(pure = true) + public Collection quantiles(Collection q) throws Exception { + return this.makeNumeric().quantiles(q); + } + + /** + * Returns an estimate of the quantiles of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + @Contract(pure = true) + public Collection quantiles( + SerializableFunction mapper, + Collection q + ) throws Exception { + TDigest digest = this.digest(mapper); + return q.stream().map(digest::quantile).collect(Collectors.toList()); + } + + /** + * generates the t-digest of the complete result set. see: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + */ + @Contract(pure = true) + private TDigest digest(SerializableFunction mapper) throws Exception { + return this.map(mapper).reduce( + () -> new MergingDigest(1000 /*todo: tweak?*/), + (acc, cur) -> { + acc.add(cur.doubleValue(), 1); + return acc; + }, + (a, b) -> { + if (a.size() == 0) { + return b; + } else if (b.size() == 0) { + return a; + } + MergingDigest r = new MergingDigest(1000); + r.add(Arrays.asList(a, b)); + return r; + } + ); + } + // ----------------------------------------------------------------------------------------------- // "Iterator" like helpers (stream, collect) // ----------------------------------------------------------------------------------------------- From 4f600fa5e22fa8076d728c913390d90c792ceef9 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 18 Oct 2018 15:32:34 +0200 Subject: [PATCH 2/9] add generic quantile generating function Which provides the most flexibility / reusability (e.g. as a map function in a stream pipeline). Also change "quantiles of a List of values" method to work on all kinds of iterables --- .../oshdb/api/mapreducer/MapReducer.java | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index 30510f711..70e635168 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -4,8 +4,10 @@ import com.tdunning.math.stats.MergingDigest; import com.tdunning.math.stats.TDigest; import java.sql.Connection; +import java.util.function.DoubleUnaryOperator; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import org.heigit.bigspatialdata.oshdb.util.celliterator.CellIterator; import org.heigit.bigspatialdata.oshdb.util.exceptions.OSHDBKeytablesNotFoundException; import org.heigit.bigspatialdata.oshdb.util.geometry.OSHDBGeometryBuilder; @@ -1207,8 +1209,8 @@ public Double quantile(SerializableFunction mapper, dou * @return estimated quantile boundaries */ @Contract(pure = true) - public Collection quantiles(Collection q) throws Exception { - return this.makeNumeric().quantiles(q); + public List quantiles(Iterable q) throws Exception { + return this.makeNumeric().quantiles(n -> n, q); } /** @@ -1222,12 +1224,46 @@ public Collection quantiles(Collection q) thr * @return estimated quantile boundaries */ @Contract(pure = true) - public Collection quantiles( + public List quantiles( SerializableFunction mapper, - Collection q + Iterable q + ) throws Exception { + return StreamSupport.stream(q.spliterator(), false) + .mapToDouble(Double::doubleValue) + .map(this.quantiles(mapper)) + .boxed() + .collect(Collectors.toList()); + } + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return a function that computes estimated quantile boundaries + */ + @Contract(pure = true) + public DoubleUnaryOperator quantiles() throws Exception { + return this.makeNumeric().quantiles(n -> n); + } + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results after applying + * the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @return a function that computes estimated quantile boundaries + */ + @Contract(pure = true) + public DoubleUnaryOperator quantiles( + SerializableFunction mapper ) throws Exception { TDigest digest = this.digest(mapper); - return q.stream().map(digest::quantile).collect(Collectors.toList()); + return digest::quantile; } /** From c0e1ce036210122e7399470d2f293452669e858a Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 18 Oct 2018 15:37:58 +0200 Subject: [PATCH 3/9] add tests for MapReducer quantiles methods --- .../oshdb/api/tests/TestQuantiles.java | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java diff --git a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java new file mode 100644 index 000000000..04e691bfe --- /dev/null +++ b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java @@ -0,0 +1,127 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.heigit.bigspatialdata.oshdb.api.tests; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; +import java.util.function.DoubleUnaryOperator; +import java.util.stream.IntStream; +import org.heigit.bigspatialdata.oshdb.api.db.OSHDBDatabase; +import org.heigit.bigspatialdata.oshdb.api.db.OSHDBH2; +import org.heigit.bigspatialdata.oshdb.api.generic.WeightedValue; +import org.heigit.bigspatialdata.oshdb.api.mapreducer.MapReducer; +import org.heigit.bigspatialdata.oshdb.api.mapreducer.OSMContributionView; +import org.heigit.bigspatialdata.oshdb.api.mapreducer.OSMEntitySnapshotView; +import org.heigit.bigspatialdata.oshdb.api.object.OSMContribution; +import org.heigit.bigspatialdata.oshdb.api.object.OSMEntitySnapshot; +import org.heigit.bigspatialdata.oshdb.osm.OSMType; +import org.heigit.bigspatialdata.oshdb.util.OSHDBBoundingBox; +import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp; +import org.heigit.bigspatialdata.oshdb.util.celliterator.ContributionType; +import org.heigit.bigspatialdata.oshdb.util.time.OSHDBTimestamps; +import org.junit.Test; + +/** + * + */ +public class TestQuantiles { + private final OSHDBDatabase oshdb; + + private final OSHDBBoundingBox bbox = new OSHDBBoundingBox(8.651133,49.387611,8.6561,49.390513); + private final OSHDBTimestamps timestamps1 = new OSHDBTimestamps("2015-01-01"); + private final OSHDBTimestamps timestamps2 = new OSHDBTimestamps("2014-01-01", "2015-01-01"); + private final OSHDBTimestamps timestamps72 = new OSHDBTimestamps("2010-01-01", "2015-12-01", OSHDBTimestamps.Interval.MONTHLY); + + private final double REQUIRED_ACCURACY = 1E-4; + + public TestQuantiles() throws Exception { + oshdb = new OSHDBH2("./src/test/resources/test-data"); + } + + private MapReducer createMapReducer() { + return OSMEntitySnapshotView.on(oshdb) + .timestamps(timestamps1) + .osmType(OSMType.WAY) + .osmTag("building", "yes") + .areaOfInterest(bbox); + } + + private void assertApproximateQuantiles( + List values, double quantile, double result) { + + double quantileIndex = (values.size() - 1) * quantile; + int quantileBoundLower = (int) Math.floor(quantileIndex); + double quantileAmountUpper = quantileIndex - quantileBoundLower; + int quantileBoundUpper = (int) Math.ceil(quantileIndex); + double quantileAmountLower = 1 - quantileAmountUpper; + double expectedResult = ( + quantileAmountLower * values.get(quantileBoundLower).doubleValue() + + quantileAmountUpper * values.get(quantileBoundUpper).doubleValue() + ); + + assertEquals(expectedResult, result, expectedResult * REQUIRED_ACCURACY); + } + + @Test + public void testMedian() throws Exception { + MapReducer mr = this.createMapReducer() + .map(s -> s.getGeometry().getCoordinates().length); + List fullResult = mr.collect(); + Collections.sort(fullResult); + + assertApproximateQuantiles(fullResult, 0.5, mr.median()); + } + + @Test + public void testQuantile() throws Exception { + MapReducer mr = this.createMapReducer() + .map(s -> s.getGeometry().getCoordinates().length); + List fullResult = mr.collect(); + Collections.sort(fullResult); + + assertApproximateQuantiles(fullResult, 0.8, mr.quantile(0.8)); + } + + @Test + public void testQuantiles() throws Exception { + MapReducer mr = this.createMapReducer() + .map(s -> s.getGeometry().getCoordinates().length); + List fullResult = mr.collect(); + Collections.sort(fullResult); + + List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); + List quantiles = mr.quantiles(qs); + + for (Double quantile : quantiles) { + assertApproximateQuantiles(fullResult, qs.get(quantiles.indexOf(quantile)), quantile); + } + } + + @Test + public void testQuantilesFunction() throws Exception { + MapReducer mr = this.createMapReducer() + .map(s -> s.getGeometry().getCoordinates().length); + List fullResult = mr.collect(); + Collections.sort(fullResult); + + List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); + DoubleUnaryOperator quantilesFunction = mr.quantiles(); + + for (Double q : qs) { + assertApproximateQuantiles(fullResult, q, quantilesFunction.applyAsDouble(q)); + } + } + +} From 66fa5b4d5105aa2fd1e41ad6dcf293f6f800f335 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 18 Oct 2018 16:44:06 +0200 Subject: [PATCH 4/9] implement quantiles methods in MapAggregator --- .../oshdb/api/mapreducer/MapAggregator.java | 180 ++++++++++++++++-- .../oshdb/api/mapreducer/MapReducer.java | 44 +++-- 2 files changed, 190 insertions(+), 34 deletions(-) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java index f62e06d45..55d277833 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java @@ -1,10 +1,14 @@ package org.heigit.bigspatialdata.oshdb.api.mapreducer; import com.google.common.collect.Lists; +import com.tdunning.math.stats.TDigest; import com.vividsolutions.jts.geom.Geometry; import com.vividsolutions.jts.geom.Polygonal; import java.util.Map.Entry; +import java.util.function.DoubleUnaryOperator; +import java.util.function.Function; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.commons.lang3.tuple.Pair; import org.heigit.bigspatialdata.oshdb.api.generic.*; @@ -19,7 +23,6 @@ import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp; import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTag; import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagInterface; -import org.heigit.bigspatialdata.oshdb.util.tagtranslator.OSMTagKey; import org.jetbrains.annotations.Contract; import java.util.*; @@ -416,14 +419,7 @@ public SortedMap> uniq(SerializableFunction mapper) throws E */ @Contract(pure = true) public SortedMap countUniq() throws Exception { - return this - .uniq().entrySet().stream() - .collect(Collectors.toMap( - Map.Entry::getKey, - e -> e.getValue().size(), - (v1, v2) -> v1, // can't happen, actually since input is already a map - TreeMap::new - )); + return transformSortedMap(this.uniq(), Set::size); } /** @@ -463,9 +459,8 @@ public SortedMap average(SerializableFunction weightedAverage(SerializableFunction mapper) throws Exception { - return this - .map(mapper) - .reduce( + return transformSortedMap( + this.map(mapper).reduce( () -> new PayloadWithWeight<>(0.0,0.0), (acc, cur) -> { acc.num = NumberUtils.add(acc.num, cur.getValue().doubleValue()*cur.getWeight()); @@ -473,12 +468,151 @@ public SortedMap weightedAverage(SerializableFunction new PayloadWithWeight<>(NumberUtils.add(a.num, b.num), a.weight+b.weight) - ).entrySet().stream().collect(Collectors.toMap( - Map.Entry::getKey, - e -> e.getValue().num / e.getValue().weight, - (v1, v2) -> v1, - TreeMap::new - )); + ), + x -> x.num / x.weight + ); + } + + /** + * Returns an estimate of the median of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return estimated median + */ + @Contract(pure = true) + public SortedMap median() throws Exception { + return this.quantile(0.5); + } + + /** + * Returns an estimate of the median of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the mean for + * @return estimated median + */ + @Contract(pure = true) + public SortedMap median(SerializableFunction mapper) throws Exception { + return this.quantile(mapper, 0.5); + } + + /** + * Returns an estimate of a requested quantile of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + @Contract(pure = true) + public SortedMap quantile(double q) throws Exception { + return this.makeNumeric().quantile(n -> n, q); + } + + /** + * Returns an estimate of a requested quantile of the results after applying the given map + * function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantile for + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + @Contract(pure = true) + public SortedMap quantile( + SerializableFunction mapper, + double q + ) throws Exception { + return transformSortedMap(this.quantiles(mapper), qFunction -> qFunction.applyAsDouble(q)); + } + + /** + * Returns an estimate of the quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + @Contract(pure = true) + public SortedMap> quantiles(Iterable q) throws Exception { + return this.makeNumeric().quantiles(n -> n, q); + } + + /** + * Returns an estimate of the quantiles of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + @Contract(pure = true) + public SortedMap> quantiles( + SerializableFunction mapper, + Iterable q + ) throws Exception { + return transformSortedMap( + this.quantiles(mapper), + quantileFunction -> StreamSupport.stream(q.spliterator(), false) + .mapToDouble(Double::doubleValue) + .map(quantileFunction) + .boxed() + .collect(Collectors.toList()) + ); + } + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return a function that computes estimated quantile boundaries + */ + @Contract(pure = true) + public SortedMap quantiles() throws Exception { + return this.makeNumeric().quantiles(n -> n); + } + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results after applying + * the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @return a function that computes estimated quantile boundaries + */ + @Contract(pure = true) + public SortedMap quantiles( + SerializableFunction mapper + ) throws Exception { + return transformSortedMap(this.digest(mapper), d -> d::quantile); + } + + /** + * generates the t-digest of the complete result set. see: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + */ + @Contract(pure = true) + private SortedMap digest(SerializableFunction mapper) throws Exception { + return this.map(mapper).reduce( + TDigestReducer::identitySupplier, + TDigestReducer::accumulator, + TDigestReducer::combiner + ); } // ----------------------------------------------------------------------------------------------- @@ -732,4 +866,14 @@ private Collection _completeZerofill(Set keys, List> zerofil ).collect(Collectors.toList()); } } + + // transforms the values of a sorted map by a given function (similar to Stream::map) + private SortedMap transformSortedMap(SortedMap in, Function transform) { + return in.entrySet().stream().collect(Collectors.toMap( + Entry::getKey, + e -> transform.apply(e.getValue()), + (v1, v2) -> { assert false; return v1; }, + TreeMap::new + )); + } } diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index 70e635168..eeaf65421 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -1196,7 +1196,7 @@ public Double quantile(double q) throws Exception { @Contract(pure = true) public Double quantile(SerializableFunction mapper, double q) throws Exception { - return this.digest(mapper).quantile(q); + return this.quantiles(mapper).applyAsDouble(q); } /** @@ -1273,21 +1273,9 @@ public DoubleUnaryOperator quantiles( @Contract(pure = true) private TDigest digest(SerializableFunction mapper) throws Exception { return this.map(mapper).reduce( - () -> new MergingDigest(1000 /*todo: tweak?*/), - (acc, cur) -> { - acc.add(cur.doubleValue(), 1); - return acc; - }, - (a, b) -> { - if (a.size() == 0) { - return b; - } else if (b.size() == 0) { - return a; - } - MergingDigest r = new MergingDigest(1000); - r.add(Arrays.asList(a, b)); - return r; - } + TDigestReducer::identitySupplier, + TDigestReducer::accumulator, + TDigestReducer::combiner ); } @@ -1802,3 +1790,27 @@ public Object apply(Object o) { return this.mapper.apply(o); } } + +class TDigestReducer /*implements Serializable*/ { + private final static int COMPRESSION = 1000; // todo: tweak? + + static TDigest identitySupplier() { + return new MergingDigest(COMPRESSION); + } + + static TDigest accumulator(TDigest acc, R cur) { + acc.add(cur.doubleValue(), 1); + return acc; + } + + static TDigest combiner(TDigest a, TDigest b) { + if (a.size() == 0) { + return b; + } else if (b.size() == 0) { + return a; + } + MergingDigest r = new MergingDigest(COMPRESSION); + r.add(Arrays.asList(a, b)); + return r; + } +} \ No newline at end of file From 24672c1d35c4e455a390ca96abf313bc56996a5b Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 18 Oct 2018 17:46:01 +0200 Subject: [PATCH 5/9] throw keytables-not-found exception if none was set --- .../heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index eeaf65421..97a65dbce 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -1630,6 +1630,9 @@ protected TagInterpreter _getTagInterpreter() throws ParseException, SQLExceptio protected TagTranslator _getTagTranslator() { if (this._tagTranslator == null) { try { + if (this._oshdbForTags == null) { + throw new OSHDBKeytablesNotFoundException(); + } this._tagTranslator = new TagTranslator(this._oshdbForTags.getConnection()); } catch (OSHDBKeytablesNotFoundException e) { LOG.error(e.getMessage()); From c38d6f7414dbf97cdc14f0230122583302415a59 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Thu, 18 Oct 2018 17:47:12 +0200 Subject: [PATCH 6/9] add tests for MapAggregator quantile functions --- .../oshdb/api/tests/TestQuantiles.java | 111 +++++++++++++++--- 1 file changed, 92 insertions(+), 19 deletions(-) diff --git a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java index 04e691bfe..8a6dc0e00 100644 --- a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java +++ b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java @@ -8,28 +8,19 @@ import static org.junit.Assert.assertEquals; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.Iterator; import java.util.List; -import java.util.Set; import java.util.SortedMap; import java.util.function.DoubleUnaryOperator; -import java.util.stream.IntStream; import org.heigit.bigspatialdata.oshdb.api.db.OSHDBDatabase; import org.heigit.bigspatialdata.oshdb.api.db.OSHDBH2; -import org.heigit.bigspatialdata.oshdb.api.generic.WeightedValue; +import org.heigit.bigspatialdata.oshdb.api.mapreducer.MapAggregator; import org.heigit.bigspatialdata.oshdb.api.mapreducer.MapReducer; -import org.heigit.bigspatialdata.oshdb.api.mapreducer.OSMContributionView; import org.heigit.bigspatialdata.oshdb.api.mapreducer.OSMEntitySnapshotView; -import org.heigit.bigspatialdata.oshdb.api.object.OSMContribution; import org.heigit.bigspatialdata.oshdb.api.object.OSMEntitySnapshot; import org.heigit.bigspatialdata.oshdb.osm.OSMType; import org.heigit.bigspatialdata.oshdb.util.OSHDBBoundingBox; import org.heigit.bigspatialdata.oshdb.util.OSHDBTimestamp; -import org.heigit.bigspatialdata.oshdb.util.celliterator.ContributionType; import org.heigit.bigspatialdata.oshdb.util.time.OSHDBTimestamps; import org.junit.Test; @@ -42,7 +33,6 @@ public class TestQuantiles { private final OSHDBBoundingBox bbox = new OSHDBBoundingBox(8.651133,49.387611,8.6561,49.390513); private final OSHDBTimestamps timestamps1 = new OSHDBTimestamps("2015-01-01"); private final OSHDBTimestamps timestamps2 = new OSHDBTimestamps("2014-01-01", "2015-01-01"); - private final OSHDBTimestamps timestamps72 = new OSHDBTimestamps("2010-01-01", "2015-12-01", OSHDBTimestamps.Interval.MONTHLY); private final double REQUIRED_ACCURACY = 1E-4; @@ -50,14 +40,6 @@ public TestQuantiles() throws Exception { oshdb = new OSHDBH2("./src/test/resources/test-data"); } - private MapReducer createMapReducer() { - return OSMEntitySnapshotView.on(oshdb) - .timestamps(timestamps1) - .osmType(OSMType.WAY) - .osmTag("building", "yes") - .areaOfInterest(bbox); - } - private void assertApproximateQuantiles( List values, double quantile, double result) { @@ -74,6 +56,16 @@ private void assertApproximateQuantiles( assertEquals(expectedResult, result, expectedResult * REQUIRED_ACCURACY); } + // MapReducer + + private MapReducer createMapReducer() { + return OSMEntitySnapshotView.on(oshdb) + .timestamps(timestamps1) + .osmType(OSMType.WAY) + .osmTag("building", "yes") + .areaOfInterest(bbox); + } + @Test public void testMedian() throws Exception { MapReducer mr = this.createMapReducer() @@ -124,4 +116,85 @@ public void testQuantilesFunction() throws Exception { } } + // MapAggregator + + private MapAggregator createMapAggregator() { + return OSMEntitySnapshotView.on(oshdb) + .timestamps(timestamps2) + .osmType(OSMType.WAY) + .osmTag("building", "yes") + .areaOfInterest(bbox) + .aggregateByTimestamp(); + } + + @Test + public void testMedianMapAggregator() throws Exception { + MapAggregator mr = this.createMapAggregator() + .map(s -> s.getGeometry().getCoordinates().length); + SortedMap> fullResult = mr.collect(); + fullResult.values().forEach(Collections::sort); + + SortedMap medians = mr.quantile(0.8); + + medians.forEach((ts, median) -> + assertApproximateQuantiles(fullResult.get(ts), 0.8, median) + ); + } + + @Test + public void testQuantileMapAggregator() throws Exception { + MapAggregator mr = this.createMapAggregator() + .map(s -> s.getGeometry().getCoordinates().length); + SortedMap> fullResult = mr.collect(); + fullResult.values().forEach(Collections::sort); + + SortedMap quantiles = mr.quantile(0.8); + + quantiles.forEach((ts, quantile) -> + assertApproximateQuantiles(fullResult.get(ts), 0.8, quantile) + ); + } + + @Test + public void testQuantilesMapAggregator() throws Exception { + MapAggregator mr = this.createMapAggregator() + .map(s -> s.getGeometry().getCoordinates().length); + SortedMap> fullResult = mr.collect(); + fullResult.values().forEach(Collections::sort); + + List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); + SortedMap> quantiless = mr.quantiles(qs); + + quantiless.forEach((ts, quantiles) -> { + for (Double quantile : quantiles) { + assertApproximateQuantiles( + fullResult.get(ts), + qs.get(quantiles.indexOf(quantile)), + quantile + ); + } + }); + } + + @Test + public void testQuantilesFunctionMapAggregator() throws Exception { + MapAggregator mr = this.createMapAggregator() + .map(s -> s.getGeometry().getCoordinates().length); + SortedMap> fullResult = mr.collect(); + fullResult.values().forEach(Collections::sort); + + List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); + SortedMap quantilesFunctions = mr.quantiles(); + + quantilesFunctions.forEach((ts, quantilesFunction) -> { + for (Double q : qs) { + assertApproximateQuantiles( + fullResult.get(ts), + q, + quantilesFunction.applyAsDouble(q) + ); + } + }); + } + } From be0bb292f46b0fa937a7f8b1a0a6a09a229421ee Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 19 Oct 2018 10:36:32 +0200 Subject: [PATCH 7/9] tDigest compression factor of 1000 should be ok --- .../oshdb/api/mapreducer/MapReducer.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index 97a65dbce..0813361b9 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -1794,8 +1794,17 @@ public Object apply(Object o) { } } -class TDigestReducer /*implements Serializable*/ { - private final static int COMPRESSION = 1000; // todo: tweak? +class TDigestReducer { + + /** + * a COMPRESSION parameter of 1000 should provide relatively precise results, while not being + * too demanding on memory usage. See page 20 in the paper [1]: + * + * > Compression parameter (1/δ) was […] 1000 in order to reliably achieve 0.1% accuracy + * + * [1] https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + */ + private final static int COMPRESSION = 1000; static TDigest identitySupplier() { return new MergingDigest(COMPRESSION); From 1194f14592ceaf9706241263abc0bd5310f0c7ba Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Fri, 19 Oct 2018 11:29:22 +0200 Subject: [PATCH 8/9] add new quantile methods to shared MR/MA interface --- .../mapreducer/MapReducerAggregations.java | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java index 17254ae8a..93082872b 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java @@ -144,10 +144,110 @@ interface MapReducerAggregations { */ Object weightedAverage(SerializableFunction mapper) throws Exception; + /** + * Returns an estimate of the median of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return estimated median + */ + Object median() throws Exception; + + /** + * Returns an estimate of the median of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the mean for + * @return estimated median + */ + Object median(SerializableFunction mapper) throws Exception; + + /** + * Returns an estimate of a requested quantile of the results. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + Object quantile(double q) throws Exception; + + /** + * Returns an estimate of a requested quantile of the results after applying the given map + * function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantile for + * @param q the desired quantile to calculate (as a number between 0 and 1) + * @return estimated quantile boundary + */ + Object quantile(SerializableFunction mapper, double q) throws Exception; + + /** + * Returns an estimate of the quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + Object quantiles(Iterable q) throws Exception; + + /** + * Returns an estimate of the quantiles of the results after applying the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) + * @return estimated quantile boundaries + */ + Object quantiles( + SerializableFunction mapper, + Iterable q + ) throws Exception; + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @return a function that computes estimated quantile boundaries + */ + Object quantiles() throws Exception; + + /** + * Returns a function that computes estimates of arbitrary quantiles of the results after applying + * the given map function. + * + * uses the t-digest algorithm to calculate estimates for the quantiles in a map-reduce system: + * https://raw.githubusercontent.com/tdunning/t-digest/master/docs/t-digest-paper/histo.pdf + * + * @param mapper function that returns the numbers to generate the quantiles for + * @return a function that computes estimated quantile boundaries + */ + Object quantiles(SerializableFunction mapper) throws Exception; + /** * Collects all results into List(s) * * @return list(s) with all results returned by the `mapper` function */ Object collect() throws Exception; + + /** + * Returns all results as a Stream + * + * @return a stream with all results returned by the `mapper` function + */ + Object stream() throws Exception; } From 741b6c880be133d4798476cdeaf3daea9faeda91 Mon Sep 17 00:00:00 2001 From: Martin Raifer Date: Mon, 29 Oct 2018 17:24:57 +0100 Subject: [PATCH 9/9] rename methods to reflect that they return estimations --- .../oshdb/api/mapreducer/MapAggregator.java | 30 +++++++++---------- .../oshdb/api/mapreducer/MapReducer.java | 30 +++++++++---------- .../mapreducer/MapReducerAggregations.java | 16 +++++----- .../oshdb/api/tests/TestQuantiles.java | 16 +++++----- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java index 55d277833..38e364730 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapAggregator.java @@ -482,8 +482,8 @@ public SortedMap weightedAverage(SerializableFunction median() throws Exception { - return this.quantile(0.5); + public SortedMap estimatedMedian() throws Exception { + return this.estimatedQuantile(0.5); } /** @@ -496,8 +496,8 @@ public SortedMap median() throws Exception { * @return estimated median */ @Contract(pure = true) - public SortedMap median(SerializableFunction mapper) throws Exception { - return this.quantile(mapper, 0.5); + public SortedMap estimatedMedian(SerializableFunction mapper) throws Exception { + return this.estimatedQuantile(mapper, 0.5); } /** @@ -510,8 +510,8 @@ public SortedMap median(SerializableFunction * @return estimated quantile boundary */ @Contract(pure = true) - public SortedMap quantile(double q) throws Exception { - return this.makeNumeric().quantile(n -> n, q); + public SortedMap estimatedQuantile(double q) throws Exception { + return this.makeNumeric().estimatedQuantile(n -> n, q); } /** @@ -526,11 +526,11 @@ public SortedMap quantile(double q) throws Exception { * @return estimated quantile boundary */ @Contract(pure = true) - public SortedMap quantile( + public SortedMap estimatedQuantile( SerializableFunction mapper, double q ) throws Exception { - return transformSortedMap(this.quantiles(mapper), qFunction -> qFunction.applyAsDouble(q)); + return transformSortedMap(this.estimatedQuantiles(mapper), qFunction -> qFunction.applyAsDouble(q)); } /** @@ -543,8 +543,8 @@ public SortedMap quantile( * @return estimated quantile boundaries */ @Contract(pure = true) - public SortedMap> quantiles(Iterable q) throws Exception { - return this.makeNumeric().quantiles(n -> n, q); + public SortedMap> estimatedQuantiles(Iterable q) throws Exception { + return this.makeNumeric().estimatedQuantiles(n -> n, q); } /** @@ -558,12 +558,12 @@ public SortedMap> quantiles(Iterable q) throws Exception * @return estimated quantile boundaries */ @Contract(pure = true) - public SortedMap> quantiles( + public SortedMap> estimatedQuantiles( SerializableFunction mapper, Iterable q ) throws Exception { return transformSortedMap( - this.quantiles(mapper), + this.estimatedQuantiles(mapper), quantileFunction -> StreamSupport.stream(q.spliterator(), false) .mapToDouble(Double::doubleValue) .map(quantileFunction) @@ -581,8 +581,8 @@ public SortedMap> quantiles( * @return a function that computes estimated quantile boundaries */ @Contract(pure = true) - public SortedMap quantiles() throws Exception { - return this.makeNumeric().quantiles(n -> n); + public SortedMap estimatedQuantiles() throws Exception { + return this.makeNumeric().estimatedQuantiles(n -> n); } /** @@ -596,7 +596,7 @@ public SortedMap quantiles() throws Exception { * @return a function that computes estimated quantile boundaries */ @Contract(pure = true) - public SortedMap quantiles( + public SortedMap estimatedQuantiles( SerializableFunction mapper ) throws Exception { return transformSortedMap(this.digest(mapper), d -> d::quantile); diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java index 0813361b9..afd5f3fad 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducer.java @@ -1150,8 +1150,8 @@ public Double weightedAverage(SerializableFunction mapper) thr * @return estimated median */ @Contract(pure = true) - public Double median() throws Exception { - return this.quantile(0.5); + public Double estimatedMedian() throws Exception { + return this.estimatedQuantile(0.5); } /** @@ -1164,8 +1164,8 @@ public Double median() throws Exception { * @return estimated median */ @Contract(pure = true) - public Double median(SerializableFunction mapper) throws Exception { - return this.quantile(mapper, 0.5); + public Double estimatedMedian(SerializableFunction mapper) throws Exception { + return this.estimatedQuantile(mapper, 0.5); } /** @@ -1178,8 +1178,8 @@ public Double median(SerializableFunction mapper) throw * @return estimated quantile boundary */ @Contract(pure = true) - public Double quantile(double q) throws Exception { - return this.makeNumeric().quantile(n -> n, q); + public Double estimatedQuantile(double q) throws Exception { + return this.makeNumeric().estimatedQuantile(n -> n, q); } /** @@ -1194,9 +1194,9 @@ public Double quantile(double q) throws Exception { * @return estimated quantile boundary */ @Contract(pure = true) - public Double quantile(SerializableFunction mapper, double q) + public Double estimatedQuantile(SerializableFunction mapper, double q) throws Exception { - return this.quantiles(mapper).applyAsDouble(q); + return this.estimatedQuantiles(mapper).applyAsDouble(q); } /** @@ -1209,8 +1209,8 @@ public Double quantile(SerializableFunction mapper, dou * @return estimated quantile boundaries */ @Contract(pure = true) - public List quantiles(Iterable q) throws Exception { - return this.makeNumeric().quantiles(n -> n, q); + public List estimatedQuantiles(Iterable q) throws Exception { + return this.makeNumeric().estimatedQuantiles(n -> n, q); } /** @@ -1224,13 +1224,13 @@ public List quantiles(Iterable q) throws Exception { * @return estimated quantile boundaries */ @Contract(pure = true) - public List quantiles( + public List estimatedQuantiles( SerializableFunction mapper, Iterable q ) throws Exception { return StreamSupport.stream(q.spliterator(), false) .mapToDouble(Double::doubleValue) - .map(this.quantiles(mapper)) + .map(this.estimatedQuantiles(mapper)) .boxed() .collect(Collectors.toList()); } @@ -1244,8 +1244,8 @@ public List quantiles( * @return a function that computes estimated quantile boundaries */ @Contract(pure = true) - public DoubleUnaryOperator quantiles() throws Exception { - return this.makeNumeric().quantiles(n -> n); + public DoubleUnaryOperator estimatedQuantiles() throws Exception { + return this.makeNumeric().estimatedQuantiles(n -> n); } /** @@ -1259,7 +1259,7 @@ public DoubleUnaryOperator quantiles() throws Exception { * @return a function that computes estimated quantile boundaries */ @Contract(pure = true) - public DoubleUnaryOperator quantiles( + public DoubleUnaryOperator estimatedQuantiles( SerializableFunction mapper ) throws Exception { TDigest digest = this.digest(mapper); diff --git a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java index 93082872b..2dbcc378f 100644 --- a/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java +++ b/oshdb-api/src/main/java/org/heigit/bigspatialdata/oshdb/api/mapreducer/MapReducerAggregations.java @@ -152,7 +152,7 @@ interface MapReducerAggregations { * * @return estimated median */ - Object median() throws Exception; + Object estimatedMedian() throws Exception; /** * Returns an estimate of the median of the results after applying the given map function. @@ -163,7 +163,7 @@ interface MapReducerAggregations { * @param mapper function that returns the numbers to generate the mean for * @return estimated median */ - Object median(SerializableFunction mapper) throws Exception; + Object estimatedMedian(SerializableFunction mapper) throws Exception; /** * Returns an estimate of a requested quantile of the results. @@ -174,7 +174,7 @@ interface MapReducerAggregations { * @param q the desired quantile to calculate (as a number between 0 and 1) * @return estimated quantile boundary */ - Object quantile(double q) throws Exception; + Object estimatedQuantile(double q) throws Exception; /** * Returns an estimate of a requested quantile of the results after applying the given map @@ -187,7 +187,7 @@ interface MapReducerAggregations { * @param q the desired quantile to calculate (as a number between 0 and 1) * @return estimated quantile boundary */ - Object quantile(SerializableFunction mapper, double q) throws Exception; + Object estimatedQuantile(SerializableFunction mapper, double q) throws Exception; /** * Returns an estimate of the quantiles of the results @@ -198,7 +198,7 @@ interface MapReducerAggregations { * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) * @return estimated quantile boundaries */ - Object quantiles(Iterable q) throws Exception; + Object estimatedQuantiles(Iterable q) throws Exception; /** * Returns an estimate of the quantiles of the results after applying the given map function. @@ -210,7 +210,7 @@ interface MapReducerAggregations { * @param q the desired quantiles to calculate (as a collection of numbers between 0 and 1) * @return estimated quantile boundaries */ - Object quantiles( + Object estimatedQuantiles( SerializableFunction mapper, Iterable q ) throws Exception; @@ -223,7 +223,7 @@ Object quantiles( * * @return a function that computes estimated quantile boundaries */ - Object quantiles() throws Exception; + Object estimatedQuantiles() throws Exception; /** * Returns a function that computes estimates of arbitrary quantiles of the results after applying @@ -235,7 +235,7 @@ Object quantiles( * @param mapper function that returns the numbers to generate the quantiles for * @return a function that computes estimated quantile boundaries */ - Object quantiles(SerializableFunction mapper) throws Exception; + Object estimatedQuantiles(SerializableFunction mapper) throws Exception; /** * Collects all results into List(s) diff --git a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java index 8a6dc0e00..42a6ecede 100644 --- a/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java +++ b/oshdb-api/src/test/java/org/heigit/bigspatialdata/oshdb/api/tests/TestQuantiles.java @@ -73,7 +73,7 @@ public void testMedian() throws Exception { List fullResult = mr.collect(); Collections.sort(fullResult); - assertApproximateQuantiles(fullResult, 0.5, mr.median()); + assertApproximateQuantiles(fullResult, 0.5, mr.estimatedMedian()); } @Test @@ -83,7 +83,7 @@ public void testQuantile() throws Exception { List fullResult = mr.collect(); Collections.sort(fullResult); - assertApproximateQuantiles(fullResult, 0.8, mr.quantile(0.8)); + assertApproximateQuantiles(fullResult, 0.8, mr.estimatedQuantile(0.8)); } @Test @@ -94,7 +94,7 @@ public void testQuantiles() throws Exception { Collections.sort(fullResult); List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); - List quantiles = mr.quantiles(qs); + List quantiles = mr.estimatedQuantiles(qs); for (Double quantile : quantiles) { assertApproximateQuantiles(fullResult, qs.get(quantiles.indexOf(quantile)), quantile); @@ -109,7 +109,7 @@ public void testQuantilesFunction() throws Exception { Collections.sort(fullResult); List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); - DoubleUnaryOperator quantilesFunction = mr.quantiles(); + DoubleUnaryOperator quantilesFunction = mr.estimatedQuantiles(); for (Double q : qs) { assertApproximateQuantiles(fullResult, q, quantilesFunction.applyAsDouble(q)); @@ -134,7 +134,7 @@ public void testMedianMapAggregator() throws Exception { SortedMap> fullResult = mr.collect(); fullResult.values().forEach(Collections::sort); - SortedMap medians = mr.quantile(0.8); + SortedMap medians = mr.estimatedQuantile(0.8); medians.forEach((ts, median) -> assertApproximateQuantiles(fullResult.get(ts), 0.8, median) @@ -148,7 +148,7 @@ public void testQuantileMapAggregator() throws Exception { SortedMap> fullResult = mr.collect(); fullResult.values().forEach(Collections::sort); - SortedMap quantiles = mr.quantile(0.8); + SortedMap quantiles = mr.estimatedQuantile(0.8); quantiles.forEach((ts, quantile) -> assertApproximateQuantiles(fullResult.get(ts), 0.8, quantile) @@ -163,7 +163,7 @@ public void testQuantilesMapAggregator() throws Exception { fullResult.values().forEach(Collections::sort); List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); - SortedMap> quantiless = mr.quantiles(qs); + SortedMap> quantiless = mr.estimatedQuantiles(qs); quantiless.forEach((ts, quantiles) -> { for (Double quantile : quantiles) { @@ -184,7 +184,7 @@ public void testQuantilesFunctionMapAggregator() throws Exception { fullResult.values().forEach(Collections::sort); List qs = Arrays.asList(0.0, 0.2, 0.4, 0.6, 0.8, 1.0); - SortedMap quantilesFunctions = mr.quantiles(); + SortedMap quantilesFunctions = mr.estimatedQuantiles(); quantilesFunctions.forEach((ts, quantilesFunction) -> { for (Double q : qs) {