Skip to content

Commit

Permalink
generate algebraic expression from cyclic graph (RedisGraph#736)
Browse files Browse the repository at this point in the history
* generate algebraic expression from cyclic graph

* improved longest path starting point candidate nomination
  • Loading branch information
swilly22 committed Nov 13, 2019
1 parent eb8d94e commit cbae11e
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 53 deletions.
1 change: 1 addition & 0 deletions src/algorithms/algorithms.h
Expand Up @@ -11,5 +11,6 @@
#include "./dfs.h"
#include "./all_paths.h"
#include "./detect_cycle.h"
#include "./longest_path.h"

#endif
6 changes: 3 additions & 3 deletions src/algorithms/detect_cycle.c
Expand Up @@ -13,7 +13,7 @@
bool IsAcyclicGraph(const QueryGraph *qg) {
assert(qg);

bool cycle = false; // Return value.
bool acyclic = true;

// Give an ID for each node, abuse of `labelID`.
uint node_count = QueryGraph_NodeCount(qg);
Expand Down Expand Up @@ -51,7 +51,7 @@ bool IsAcyclicGraph(const QueryGraph *qg) {
res = GrB_Matrix_nvals(&nvals, t);
assert(res == GrB_SUCCESS);
if(nvals != 0) {
cycle = true;
acyclic = false;
break;
}
}
Expand All @@ -61,6 +61,6 @@ bool IsAcyclicGraph(const QueryGraph *qg) {
GrB_free(&c);
GrB_free(&t);

return cycle;
return acyclic;
}

19 changes: 8 additions & 11 deletions src/algorithms/dfs.c
Expand Up @@ -9,7 +9,7 @@
#include "rax.h"
#include "../graph/entities/qg_edge.h"

bool _DFS(QGNode *n, int level, int current_level, rax *visited, QGEdge ***path) {
bool _DFS(QGNode *n, int level, bool close_cycle, int current_level, rax *visited, QGEdge ***path) {
// As long as we've yet to reach required level and there are nodes to process.
if(current_level >= level) return true;

Expand All @@ -21,25 +21,22 @@ bool _DFS(QGNode *n, int level, int current_level, rax *visited, QGEdge ***path)

// Expand node N by visiting all of its neighbors
bool not_seen;
bool self_pointing_edge;
for(uint i = 0; i < array_len(n->outgoing_edges); i++) {
QGEdge *e = n->outgoing_edges[i];
self_pointing_edge = (strcmp(e->dest->alias, n->alias) == 0);
not_seen = raxFind(visited, (unsigned char *)e->dest->alias, strlen(e->dest->alias)) == raxNotFound;
if(self_pointing_edge || not_seen) {
if(not_seen || close_cycle) {
*path = array_append(*path, e);
if(_DFS(e->dest, level, ++current_level, visited, path)) return true;
if(_DFS(e->dest, level, close_cycle, current_level + 1, visited, path)) return true;
array_pop(*path);
}
}

for(uint i = 0; i < array_len(n->incoming_edges); i++) {
QGEdge *e = n->incoming_edges[i];
self_pointing_edge = (strcmp(e->src->alias, n->alias) == 0);
not_seen = raxFind(visited, (unsigned char *)e->src->alias, strlen(e->src->alias)) == raxNotFound;
if(self_pointing_edge || not_seen) {
if(not_seen || close_cycle) {
*path = array_append(*path, e);
if(_DFS(e->src, level, ++current_level, visited, path)) return true;
if(_DFS(e->src, level, close_cycle, current_level + 1, visited, path)) return true;
array_pop(*path);
}
}
Expand All @@ -49,12 +46,12 @@ bool _DFS(QGNode *n, int level, int current_level, rax *visited, QGEdge ***path)
}

// Returns a single path from S to a reachable node at distance level.
QGEdge **DFS(QGNode *s, int level) {
QGEdge **DFS(QGNode *s, int level, bool close_cycle) {
int current_level = 0; // Tracks BFS level.
rax *visited = raxNew(); // Dictionary of visited nodes.
QGEdge **path = array_new(QGEdge *, 0); // Path found.
QGEdge **path = array_new(QGEdge *, 0); // Path found.

_DFS(s, level, current_level, visited, &path);
_DFS(s, level, close_cycle, current_level, visited, &path);
raxFree(visited);
return path;
}
5 changes: 3 additions & 2 deletions src/algorithms/dfs.h
Expand Up @@ -14,6 +14,7 @@
/* Perform DFS scan from node S,
* Returns a single path from S to a reachable node at distance level. */
QGEdge **DFS(
QGNode *s, // Node from which DFS scan begins.
int level // Stop scanning once reached level.
QGNode *s, // Node from which DFS scan begins.
int level, // Stop scanning once reached level.
bool close_cycle // Allow DFS scan to close a cycle.
);
91 changes: 91 additions & 0 deletions src/algorithms/longest_path.c
@@ -0,0 +1,91 @@
/*
* Copyright 2018-2019 Redis Labs Ltd. and Contributors
*
* This file is available under the Redis Labs Source Available License Agreement
*/

#include "./longest_path.h"
#include "./bfs.h"
#include "./dfs.h"
#include "../util/arr.h"
#include"./detect_cycle.h"

// Scans the graph in a DFS fashion, keeps track after the longest path length.
static void __DFSMaxDepth(QGNode *n, int level, int *max_depth, rax *visited) {
if(level > *max_depth) *max_depth = level;

// Mark n as visited, return if node already marked.
if(!raxInsert(visited, (unsigned char *)n->alias, strlen(n->alias), NULL, NULL)) {
// We've already processed n.
return;
}

// Expand node N by visiting all of its neighbors
for(uint i = 0; i < array_len(n->outgoing_edges); i++) {
QGEdge *e = n->outgoing_edges[i];
__DFSMaxDepth(e->dest, level + 1, max_depth, visited);
}

for(uint i = 0; i < array_len(n->incoming_edges); i++) {
QGEdge *e = n->incoming_edges[i];
__DFSMaxDepth(e->src, level + 1, max_depth, visited);
}

raxRemove(visited, (unsigned char *)n->alias, strlen(n->alias), NULL);
}

// Finds out the longest path distance from given node.
static int _DFSMaxDepth(QGNode *n) {
int level = 0; // Starting at level 0.
int max_depth = 0; // Longest path length.
rax *visited = raxNew(); // Dictionary of visited nodes.

__DFSMaxDepth(n, level, &max_depth, visited);

raxFree(visited);
return max_depth;
}

// Finds the longest path in an cyclic graph.
QGNode *LongestPathGraph(const QueryGraph *g, int *level) {
/* To find the longest path in a graph containing a cycle
* where we do not expand from a visited node:
* 1. the entire graph is a cycle, in which case it doesn't matter
* which node we pick to begin out traversal.
* 2. there's a node with in-degree of out-degree 0, as we know
* this node resided on the "edge" of the graph from which the longest path
* begins/ends. */

QGNode *n = NULL; // Node from which the longest path expand.
uint node_count = QueryGraph_NodeCount(g);
for(uint i = 0; i < node_count; i++) {
n = g->nodes[i];
if(QGNode_IncomeDegree(n) == 0 || QGNode_OutgoingDegree(n) == 0) {
*level = _DFSMaxDepth(n);
return n;
}
}

// All nodes are part of a cycle, pick one randomly.
n = g->nodes[0];
*level = _DFSMaxDepth(n);
return n;
}

// Finds the longest path in an acyclic graph.
QGNode *LongestPathTree(const QueryGraph *g, int *level) {
int l = BFS_LOWEST_LEVEL;
QGNode **leafs = BFS(g->nodes[0], &l);
QGNode *leaf = leafs[0];
array_free(leafs);

l = BFS_LOWEST_LEVEL;
leafs = BFS(leaf, &l);

assert(array_len(leafs) > 0 && l >= 0);
QGNode *n = leafs[0];
array_free(leafs);

*level = l;
return n;
}
15 changes: 15 additions & 0 deletions src/algorithms/longest_path.h
@@ -0,0 +1,15 @@
/*
* Copyright 2018-2019 Redis Labs Ltd. and Contributors
*
* This file is available under the Redis Labs Source Available License Agreement
*/

#pragma once

#include "../graph/query_graph.h"
#include "../graph/entities/qg_node.h"

// Determins the longest path length within a tree structured graph.
QGNode *LongestPathTree(const QueryGraph *g, int *level);
// Determins the longest path length within a graph (containing cycles).
QGNode *LongestPathGraph(const QueryGraph *g, int *level);
38 changes: 18 additions & 20 deletions src/arithmetic/algebraic_expression.c
Expand Up @@ -230,21 +230,6 @@ static void _RemovePathFromGraph(QueryGraph *g, QGEdge **path) {
}
}

/* Determine the length of the longest path in the graph.
* Returns a list residing on the edge of the longest path. */
static QGNode **_DeepestLevel(const QueryGraph *g, int *level) {
int l = BFS_LOWEST_LEVEL;
QGNode **leafs = BFS(g->nodes[0], &l);
QGNode *leaf = leafs[0];
array_free(leafs);

l = BFS_LOWEST_LEVEL;
leafs = BFS(leaf, &l);

*level = l;
return leafs;
}

/* If the edge is referenced or of a variable length, it should populate the AlgebraicExpression. */
static inline bool _should_populate_edge(QGEdge *e) {
return (_referred_entity(e->alias) || QGEdge_VariableLength(e));
Expand Down Expand Up @@ -524,20 +509,30 @@ AlgebraicExpression **AlgebraicExpression_FromQueryGraph(const QueryGraph *qg, u
return NULL;
}

bool acyclic = IsAcyclicGraph(qg);
QueryGraph *g = QueryGraph_Clone(qg);
AlgebraicExpression **exps = array_new(AlgebraicExpression *, 1);

// As long as there are nodes to process.
while(QueryGraph_NodeCount(g) > 0) {
// Get leaf nodes at the deepest level.
int level;
QGNode **leafs = _DeepestLevel(g, &level);
assert(array_len(leafs) > 0 && level >= 0);
QGNode *n;
if(acyclic) n = LongestPathTree(g, &level); // Graph is a tree.
else n = LongestPathGraph(g, &level); // Graph contains cycles.

// Get a path of length level.
QGEdge **path = DFS(leafs[0], level);
// Get a path of length level, alow closing a cycle if the graph is not acyclic.
QGEdge **path = DFS(n, level, !acyclic);
assert(array_len(path) == level);

/* TODO:
* In case path is a cycle, e.g. (b)-[]->(a)-[]->(b)
* make sure the first node on the path is referenced, _should_divide_expression(path, 0) is true.
* if this is not the case we will unnceserly break the generated expression into 2 sub expressions
* while what we can do is simply rotate the cycle, (a)-[]->(b)-[]->(a)
* this is exactly the same only now we won't sub divide.
* Checking if path is a cycle done by testing the start and end node. */

// Construct expression.
AlgebraicExpression *exp = _AlgebraicExpression_FromPath(path, level);

Expand All @@ -554,12 +549,15 @@ AlgebraicExpression **AlgebraicExpression_FromQueryGraph(const QueryGraph *qg, u

// Clean up
array_free(path);
array_free(leafs);
array_free(sub_exps);
free(exp->operands);
free(exp);
// TODO memory leak (fails on [a|b] relations?)
// AlgebraicExpression_Free(exp);

/* If original graph contained a cycle
* see now after we've removed a path if this is still the case. */
if(!acyclic) acyclic = IsAcyclicGraph(g);
}

// TODO just return exps?
Expand Down
1 change: 0 additions & 1 deletion src/module.c
Expand Up @@ -161,4 +161,3 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)

return REDISMODULE_OK;
}

21 changes: 10 additions & 11 deletions tests/unit/test_algebraic_expression.cpp
Expand Up @@ -1044,6 +1044,10 @@ TEST_F(AlgebraicExpressionTest, ShareableEntity) {
free(actual);

// Cycle.
/* TODO: The algebraic expression here can be improved
* reducing from 2 expression into a single one
* mat_p * mat_ef * mat_p * mat_ef * mat_p
* see comment in AlgebraicExpression_FromQueryGraph regarding cycles. */
exp_count = 0;
q = "MATCH (a:Person)-[:friend]->(b:Person)-[:friend]->(a:Person) RETURN a";
actual = build_algebraic_expression(q, &exp_count);
Expand All @@ -1057,14 +1061,13 @@ TEST_F(AlgebraicExpressionTest, ShareableEntity) {
expected[0] = exp;

exp = AlgebraicExpression_Empty();
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
AlgebraicExpression_AppendTerm(exp, mat_ef, false, false, false);
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
expected[1] = exp;

compare_algebraic_expressions(actual, expected, 2);

// Clean up.
// Clean up.
free_algebraic_expressions(actual, exp_count);
free_algebraic_expressions(expected, exp_count);
free(expected);
Expand All @@ -1074,24 +1077,20 @@ TEST_F(AlgebraicExpressionTest, ShareableEntity) {
exp_count = 0;
q = "MATCH (a:Person)-[:friend]->(b:Person)-[:friend]->(c:Person)-[:friend]->(a:Person) RETURN a";
actual = build_algebraic_expression(q, &exp_count);
ASSERT_EQ(exp_count, 2);
ASSERT_EQ(exp_count, 1);

expected = (AlgebraicExpression **)malloc(sizeof(AlgebraicExpression *) * 2);
exp = AlgebraicExpression_Empty();
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
AlgebraicExpression_AppendTerm(exp, mat_ef, false, false, false);
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
AlgebraicExpression_AppendTerm(exp, mat_ef, false, false, false);
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
expected[0] = exp;

exp = AlgebraicExpression_Empty();
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
AlgebraicExpression_AppendTerm(exp, mat_ef, false, false, false);
AlgebraicExpression_AppendTerm(exp, mat_p, false, false, true);
expected[1] = exp;

compare_algebraic_expressions(actual, expected, 2);
expected[0] = exp;
compare_algebraic_expressions(actual, expected, 1);

// Clean up.
free_algebraic_expressions(actual, exp_count);
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_detect_cycle.cpp
Expand Up @@ -115,14 +115,14 @@ class DetectCycleTest: public ::testing::Test {

TEST_F(DetectCycleTest, AcyclicGraph) {
QueryGraph *g = AcyclicBuildGraph(); // Graph traversed.
ASSERT_FALSE(IsAcyclicGraph(g));
ASSERT_TRUE(IsAcyclicGraph(g));
// Clean up.
QueryGraph_Free(g);
}

TEST_F(DetectCycleTest, CyclicGraph) {
QueryGraph *g = CyclicBuildGraph(); // Graph traversed.
ASSERT_TRUE(IsAcyclicGraph(g));
ASSERT_FALSE(IsAcyclicGraph(g));
// Clean up.
QueryGraph_Free(g);
}
6 changes: 3 additions & 3 deletions tests/unit/test_dfs.cpp
Expand Up @@ -71,8 +71,8 @@ class DFSTest: public ::testing::Test {
};

TEST_F(DFSTest, DFSLevels) {
QGNode *S; // DFS starts here.
QGEdge **path; // Path reached by DFS.
QGNode *S; // DFS starts here.
QGEdge **path; // Path reached by DFS.
QueryGraph *g; // Graph traversed.

g = BuildGraph();
Expand All @@ -97,7 +97,7 @@ TEST_F(DFSTest, DFSLevels) {
//------------------------------------------------------------------------------

for(int level = 0; level < 5; level++) {
path = DFS(S, level);
path = DFS(S, level, true);
QGEdge **expectation = expected[level];

int edge_count = array_len(path);
Expand Down

0 comments on commit cbae11e

Please sign in to comment.