Skip to content

Commit

Permalink
Implement mixed index aggregations
Browse files Browse the repository at this point in the history
Signed-off-by: To-om <thomas@strangebee.com>
  • Loading branch information
To-om authored and porunov committed Oct 21, 2022
1 parent e0fc21f commit 48e1d0a
Show file tree
Hide file tree
Showing 29 changed files with 776 additions and 155 deletions.
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ JanusGraph now officially supports Java 11 in addition to Java 8. We encourage e

Preparation for TinkerPop 3.6.

##### Mixed index aggregation optimization

A new optimization has been added to compute aggregations (min, max, sum and avg) using mixed index engine (if the aggregation function follows an indexed query).
If the index backend is Elasticsearch, a `double` value is used to hold the result. As a result, aggregations on long numbers greater than 2^53 are approximate.
In this case, if the accurate result is essential, the optimization can be disabled by removing the strategy `JanusGraphMixedIndexAggStrategy`: `g.traversal().withoutStrategies(JanusGraphMixedIndexAggStrategy.class)`.

### Version 0.6.3 (Release Date: ???)

```xml tab='Maven'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import java.util.Map;

import static org.janusgraph.diskstorage.Backend.METRICS_INDEX_PROVIDER_NAME;
import static org.janusgraph.diskstorage.util.MetricInstrumentedIndexProvider.M_MIXED_COUNT_QUERY;
import static org.janusgraph.diskstorage.util.MetricInstrumentedIndexProvider.M_MIXED_AGG_QUERY;
import static org.janusgraph.diskstorage.util.MetricInstrumentedIndexProvider.M_MUTATE;
import static org.janusgraph.diskstorage.util.MetricInstrumentedIndexProvider.M_QUERY;
import static org.janusgraph.diskstorage.util.MetricInstrumentedIndexProvider.M_RAW_QUERY;
Expand All @@ -54,21 +54,28 @@ public void open(WriteConfiguration config) {
public void testIndexMetrics() {
PropertyKey p1 = mgmt.makePropertyKey("p1").dataType(String.class).make();
mgmt.makePropertyKey("p2").dataType(String.class).make();
mgmt.buildIndex("idx", Vertex.class).addKey(p1, Mapping.STRING.asParameter()).buildMixedIndex("search");
PropertyKey p3 = mgmt.makePropertyKey("p3").dataType(Long.class).make();
mgmt.makePropertyKey("p4").dataType(Integer.class).make();
mgmt.buildIndex("idx", Vertex.class).addKey(p1, Mapping.STRING.asParameter()).addKey(p3).buildMixedIndex("search");
finishSchema();

graph.traversal().addV().property("p1", "value").iterate();
graph.traversal().addV().property("p1", "value").property("p3", 42).property("p4", 12).iterate();
graph.tx().commit();
graph.traversal().V().has("p1", "value").iterate();
graph.traversal().V().has("p2", "value").iterate();
graph.traversal().V().has("p1", "value").count().next();
graph.traversal().V().has("p1", "value").values("p3").max().next();
graph.traversal().V().has("p1", "value").values("p3").min().next();
graph.traversal().V().has("p1", "value").values("p3").sum().next();
graph.traversal().V().has("p1", "value").values("p3").mean().next();
graph.traversal().V().has("p1", "value").values("p4").max().next();
graph.indexQuery("idx", "p1:*").vertexTotals();
graph.indexQuery("idx", "p1:*").vertexStream();

Assertions.assertThrows(JanusGraphException.class, () ->
graph.indexQuery("idx", "!@#$%^").vertexTotals());

verifyIndexMetrics("search", METRICS_INDEX_PREFIX, ImmutableMap.of(M_MUTATE, 1L, M_QUERY, 1L, M_MIXED_COUNT_QUERY, 1L, M_TOTALS, 2L, M_RAW_QUERY, 1L));
verifyIndexMetrics("search", METRICS_INDEX_PREFIX, ImmutableMap.of(M_MUTATE, 1L, M_QUERY, 1L, M_MIXED_AGG_QUERY, 5L, M_TOTALS, 2L, M_RAW_QUERY, 1L));
assertEquals(1, metric.getCounter(METRICS_INDEX_PREFIX, "search", M_TOTALS, MetricInstrumentedStore.M_EXCEPTIONS).getCount());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
import org.janusgraph.graphdb.query.index.BruteForceIndexSelectionStrategy;
import org.janusgraph.graphdb.query.index.ThresholdBasedIndexSelectionStrategy;
import org.janusgraph.graphdb.query.profile.QueryProfiler;
import org.janusgraph.graphdb.tinkerpop.optimize.step.JanusGraphMixedIndexCountStep;
import org.janusgraph.graphdb.tinkerpop.optimize.step.JanusGraphMixedIndexAggStep;
import org.janusgraph.graphdb.tinkerpop.optimize.step.JanusGraphStep;
import org.janusgraph.graphdb.tinkerpop.optimize.strategy.JanusGraphMixedIndexCountStrategy;
import org.janusgraph.graphdb.transaction.StandardJanusGraphTx;
Expand Down Expand Up @@ -1115,21 +1115,32 @@ public void testGraphCentricQueryProfiling() {
final PropertyKey prop = makeKey("prop", String.class);
final PropertyKey description = makeKey("desc", String.class);
final PropertyKey pet = makeKey("pet", String.class);
mgmt.buildIndex("mixed", Vertex.class).addKey(name, Mapping.STRING.asParameter())
.addKey(prop, Mapping.STRING.asParameter()).buildMixedIndex(INDEX);
final PropertyKey height = makeKey("height", Integer.class);
final PropertyKey weight = makeKey("weight", Float.class);
mgmt.buildIndex("mixed", Vertex.class)
.addKey(name, Mapping.STRING.asParameter())
.addKey(prop, Mapping.STRING.asParameter())
.addKey(height)
.addKey(weight)
.buildMixedIndex(INDEX);
mgmt.buildIndex("mi", Vertex.class).addKey(description).addKey(pet).buildMixedIndex(INDEX2);
finishSchema();

tx.addVertex("name", "bob", "prop", "val", "desc", "he likes coding", "pet", "he likes dogs", "age", 20);
tx.addVertex("name", "bob", "prop", "val2", "desc", "he likes coding", "pet", "he likes cats", "age", 25);
tx.addVertex("name", "alex", "prop", "val", "desc", "he likes debugging", "pet", "he likes cats", "age", 20);
tx.addVertex("name", "bob", "prop", "val", "desc", "he likes coding", "pet", "he likes dogs", "age", 20, "height", 170, "weight", 72.3);
tx.addVertex("name", "bob", "prop", "val2", "desc", "he likes coding", "pet", "he likes cats", "age", 25, "height", 175, "weight", 102.5);
tx.addVertex("name", "alex", "prop", "val", "desc", "he likes debugging", "pet", "he likes cats", "age", 20, "height", 190, "weight", 55.3);
tx.commit();

// satisfied by a single graph-centric query which is satisfied by a single mixed index query
if (indexFeatures.supportNotQueryNormalForm()) {
newTx();
assertEquals(3, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).count().next());
assertEquals(3, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).toList().size());
assertEquals(190, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).values("height").max().next());
assertEquals(25, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).values("age").max().next());
assertEquals(170, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).values("height").min().next());
assertEquals(535L, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).values("height").sum().next());
assertEquals(535.0/3, tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val")).values("height").mean().next());
Metrics mMixedOr = tx.traversal().V().or(__.has("name", "bob"), __.has("prop", "val"))
.profile().next().getMetrics(0);
assertEquals("Or(JanusGraphStep([],[name.eq(bob)]),JanusGraphStep([],[prop.eq(val)]))", mMixedOr.getName());
Expand Down Expand Up @@ -1223,6 +1234,10 @@ public void testGraphCentricQueryProfiling() {
newTx();
assertEquals(3, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).count().next());
assertEquals(3, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).toList().size());
assertEquals(190, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).values("height").max().next());
assertEquals(170, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).values("height").min().next());
assertEquals(535L, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).values("height").sum().next());
assertEquals(535.0/3, tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20)).values("height").mean().next());
Metrics mMixedOr = tx.traversal().V().or(__.has("name", "bob"), __.has("age", 20))
.profile().next().getMetrics(0);
assertEquals("Or(JanusGraphStep([],[name.eq(bob)]),JanusGraphStep([],[age.eq(20)]))", mMixedOr.getName());
Expand Down Expand Up @@ -1276,6 +1291,11 @@ public void testGraphCentricQueryProfiling() {
newTx();
assertEquals(1, tx.traversal().V().has("name", "bob").has("prop", "val").count().next());
assertEquals(1, tx.traversal().V().has("name", "bob").has("prop", "val").toList().size());
assertEquals(170, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").max().next());
assertEquals(170, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").min().next());
assertEquals(170L, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").sum().next());
assertEquals(170.0, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").mean().next());
assertEquals(174.8, (double)tx.traversal().V().has("name", "bob").values("weight").sum().next(), 0.001);
Metrics mMixedAnd = tx.traversal().V().has("name", "bob").has("prop", "val")
.profile().next().getMetrics(0);
assertEquals("JanusGraphStep([],[name.eq(bob), prop.eq(val)])", mMixedAnd.getName());
Expand Down Expand Up @@ -1303,6 +1323,10 @@ public void testGraphCentricQueryProfiling() {
newTx();
assertEquals(1, tx.traversal().V().has("name", "bob").has("prop", "val").count().next());
assertEquals(1, tx.traversal().V().has("name", "bob").has("prop", "val").toList().size());
assertEquals(170, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").max().next());
assertEquals(170, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").min().next());
assertEquals(170L, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").sum().next());
assertEquals(170.0, tx.traversal().V().has("name", "bob").has("prop", "val").values("height").mean().next());
final Metrics mMixedAnd2 = tx.traversal().V().has("name", "bob").has("prop", "val")
.has("desc", Text.textContains("coding")).profile().next().getMetrics(0);
assertEquals("JanusGraphStep([],[name.eq(bob), prop.eq(val), desc.textContains(coding)])", mMixedAnd2.getName());
Expand Down Expand Up @@ -1633,7 +1657,7 @@ public void testIndexParameters() {
private void checkMixedIndexCountProfiling(TraversalMetrics profile, Map<String, String> annotations) {
Metrics metrics = profile.getMetrics(0);
assertTrue(metrics.getDuration(TimeUnit.MICROSECONDS) > 0);
assertTrue(metrics.getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertTrue(metrics.getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
Metrics nested = (Metrics) metrics.getNested().toArray()[0];
assertEquals(QueryProfiler.MIXED_INEX_COUNT_QUERY, nested.getName());
assertTrue(nested.getDuration(TimeUnit.MICROSECONDS) > 0);
Expand Down Expand Up @@ -1723,7 +1747,7 @@ public void testMixedIndexQueryFollowedByCount() {
assertEquals(total, graph.indexQuery(EINDEX, "e.name:(\"Uncle Berry has a farm\", \"ducks are beautiful animals\")").edgeTotals());
profile = graph.traversal().E().has("name", P.within("Uncle Berry has a farm", "ducks are beautiful animals"))
.count().profile().next();
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertEquals(total, graph.traversal().E().has("name", P.within("Uncle Berry has a farm", "ducks are beautiful animals")).count().next());


Expand All @@ -1747,7 +1771,7 @@ public void testMixedIndexQueryFollowedByCount() {
assertEquals(total, graph.indexQuery(EINDEX, "e.name:(\"Uncle Berry has a farm\", \"and on his farm he has five ducks\")").edgeTotals());
profile = graph.traversal().E().or(__.has("name", "Uncle Berry has a farm"), __.has("name", "and on his farm he has five ducks"))
.count().profile().next();
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertEquals(total, graph.traversal().E().or(__.has("name", "Uncle Berry has a farm"), __.has("name", "and on his farm he has five ducks"))
.count().next());

Expand Down Expand Up @@ -1813,7 +1837,7 @@ public void testMixedIndexQueryFollowedByCount() {

total = numV / strings.length;
profile = graph.traversal().V().has("name", "Uncle Berry has a farm").out().count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphStep.class.getSimpleName()));
assertEquals(total, graph.traversal().V().has("name", "Uncle Berry has a farm").out().count().next());

Expand All @@ -1830,11 +1854,11 @@ public void testMixedIndexQueryFollowedByCount() {
newTx();

profile = graph.traversal().V().has("age", 0).count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphStep.class.getSimpleName()));
assertEquals(5, graph.traversal().V().has("age", 0).count().next());
profile = graph.traversal().V().has("age").count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertTrue(profile.getMetrics(0).getName().contains(JanusGraphStep.class.getSimpleName()));
assertEquals(10, graph.traversal().V().has("age").count().next());

Expand All @@ -1843,17 +1867,17 @@ public void testMixedIndexQueryFollowedByCount() {

assertEquals(10, graph.traversal().V().has("name").has("age").count().next());
profile = graph.traversal().V().has("name").has("age").count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
assertEquals(5, graph.traversal().V().has("name").has("age", 0).count().next());
profile = graph.traversal().V().has("name").has("age", 0).count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));
profile = graph.traversal().V().has("age").has("name").count().profile().next();
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexCountStep.class.getSimpleName()));
assertFalse(profile.getMetrics(0).getName().contains(JanusGraphMixedIndexAggStep.class.getSimpleName()));

/* ==========================
verify other special cases */

// cannot convert to JanusGraphMixedIndexCountStep if the first JanusGraphStep is not the start step
// cannot convert to JanusGraphMixedIndexAggStep if the first JanusGraphStep is not the start step
assertEquals((numV + 10) * numV, graph.traversal().V().V().has("text").count().next());
assertEquals(graph.traversal().withoutStrategies(JanusGraphMixedIndexCountStrategy.class).V().V().has("text").count().next(),
graph.traversal().V().V().has("text").count().next());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.janusgraph.diskstorage.indexing.KeyInformation;
import org.janusgraph.diskstorage.indexing.RawQuery;
import org.janusgraph.graphdb.query.JanusGraphPredicate;
import org.janusgraph.graphdb.tinkerpop.optimize.step.Aggregation;

import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -84,8 +85,8 @@ public Stream<String> query(IndexQuery query, KeyInformation.IndexRetriever info
}

@Override
public Long queryCount(IndexQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
return index.queryCount(query, information, tx);
public Number queryAggregation(IndexQuery query, KeyInformation.IndexRetriever information, BaseTransaction tx, Aggregation aggregation) throws BackendException {
return index.queryAggregation(query, information, tx, aggregation);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@
package org.janusgraph.core;

import org.janusgraph.graphdb.query.profile.ProfileObservable;
import org.janusgraph.graphdb.tinkerpop.optimize.step.Aggregation;

/**
* @author Boxuan Li (liboxuan@connect.hku.hk)
*/
public interface MixedIndexCountQuery extends ProfileObservable {
public interface MixedIndexAggQuery extends ProfileObservable {

/**
* Fire a count query against index backend to retrieve total number of satisfying elements
* Fire an aggregation query against index backend to retrieve aggregated result of satisfying elements
*
* @return total elements that match the query
* @param aggregation aggregation operation to perform on elements that match the query
* @return the result of the aggregation operation
*/
Long executeTotals();
Number execute(Aggregation aggregation);
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ public interface Transaction extends Graph, SchemaManager {
JanusGraphQuery<? extends JanusGraphQuery> query();

/**
* @return a mixed index count query which leverages mixed index for counts
* @see StandardJanusGraphTx#mixedIndexCountQuery()
* @return a mixed index aggregation query which leverages mixed index for aggregation
* @see StandardJanusGraphTx#mixedIndexAggQuery()
*/
MixedIndexCountQuery mixedIndexCountQuery();
MixedIndexAggQuery mixedIndexAggQuery();

/**
* Returns a {@link org.janusgraph.core.JanusGraphIndexQuery} to query for vertices or edges against the specified indexing backend using
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.janusgraph.diskstorage.util.BackendOperation;
import org.janusgraph.diskstorage.util.BufferUtil;
import org.janusgraph.graphdb.database.serialize.DataOutput;
import org.janusgraph.graphdb.tinkerpop.optimize.step.Aggregation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -432,17 +433,17 @@ public String toString() {
});
}

public Long indexQueryCount(final String index, final IndexQuery query) {
public Number indexQueryAggregation(final String index, final IndexQuery query, final Aggregation aggregation) {
final IndexTransaction indexTx = getIndexTransaction(index);
return executeRead(new Callable<Long>() {
return executeRead(new Callable<Number>() {
@Override
public Long call() throws Exception {
return indexTx.queryCount(query);
public Number call() throws Exception {
return indexTx.queryAggregation(query, aggregation);
}

@Override
public String toString() {
return "indexQueryCount";
return "indexQueryAggregation";
}
});

Expand Down
Loading

0 comments on commit 48e1d0a

Please sign in to comment.