In [307]:
import networkx as nx

from regraph import Rule, plot_rule
from regraph.default.utils import keys_by_value, normalize_attrs
from regraph.neo4j.graphs import Neo4jGraph
from regraph.neo4j.cypher_utils import *

import profiling_utils

In [308]:
import importlib

In [309]:
importlib.reload(profiling_utils)

<module 'profiling_utils' from '/home/eugenia/ReGraph/examples/Profiling/profiling_utils.py'>

In [310]:
# initialize the neo4j driver, wrappped into Neo4jGraph object
g = Neo4jGraph(uri="bolt://localhost:7687", user="neo4j", password="admin")

In [311]:
# here we clear the db
res = g.clear()

In [312]:
nodes = [
    ("a", {"name": "EGFR", "state": "p"}),
    ("b", {"name": "BND"}),
    ("c", {"name": "Grb2", "aa": "S", "loc": 90}),
    ("d", {"name": "SH2"}),
    ("e", {"name": "EGFR"}),
    ("f", {"name": "BND"}),
    ("g", {"name": "Grb2"}),
    ("h", {"name": "WAF1"}),
    ("i", {"name": "BND"}),
    ("j", {"name": "G1-S/CDK", "state": "p"}),
    "k", "l", "m"
    ]
edges = [
    ("a", "b", {"s": "p"}),
    ("d", "b", {"s": "u"}),
    ("d", "c"),
    ("e", "f", {"s": "p"}),
    ("g", "f", {"s": "u"}),
    ("h", "i"),
    ("i", "h"),
    ("j", "h", {"act": {1}}),
    ("j", "i", {"act": {2}}),
    ("k", "l"),
    ("l", "k"),
    ("l", "m"),
    ("m", "l"),
    ("k", "m"),
    ("m", "k"),
    ("e", "b", {"s": "u"})
    ]
g.add_nodes_from(nodes)
g.add_edges_from(edges)

### Profiling clone query

In [313]:
# 

In [315]:
node = "a"
query = "PROFILE\n" +\
        match_node('x', node) +\
        cloning_query(
            original_var='x',
            clone_var='new_node',
            clone_id=node,
            clone_id_var='uid',
            ignore_naming=True)[0] +\
        return_vars(['uid'])
print(query)
res = g.execute(query).summary()

PROFILE
MATCH (x:node { id : 'a' })
WITH [] as sucIgnore, [] as predIgnore, x 
// match successors and out-edges of a node to be cloned
OPTIONAL MATCH (x)-[out_edge:edge]->(suc) 
WHERE NOT suc.id IS NULL AND NOT suc.id IN sucIgnore
WITH collect({neighbor: suc, edge: out_edge}) as suc_maps, predIgnore, x 
// match predecessors and in-edges of a node to be cloned
OPTIONAL MATCH (pred)-[in_edge:edge]->(x) 
WHERE NOT pred.id IS NULL AND NOT pred.id IN predIgnore
WITH collect({neighbor: pred, edge: in_edge}) as pred_maps, suc_maps, x 
// create a node corresponding to the clone
CREATE (new_node:node) 
WITH new_node, toString(id(new_node)) as uid, x.id as original_old, suc_maps, pred_maps, x 
// set the id property of the original node to NULL
SET x.id = NULL
// copy all the properties of the original node to the clone
SET new_node = x
// set id property of the clone to neo4j-generated id
SET new_node.id = toString(id(new_node)), new_node.count = NULL
// set back the id property of the origi

In [316]:
profiling_utils.total_db_hits(res.profile)

42

In [259]:
p.db_hits

0

In [260]:
i = p.index

In [261]:
print(i)

<built-in method index of ProfiledPlan object at 0x7f8886604b40>


In [262]:
print(res.result_available_after + res.result_consumed_after)

16


### Profiling merge queries (APOC)

In [263]:
n1 = "a"
n2 = "b"
name = "a_b"
node_list = [n1, n2]

query ="PROFILE\n" +\
        match_nodes({n: n for n in node_list}) + "\n" +\
        merging_query(
            original_vars=node_list,
            merged_var='merged_node',
            merged_id=name,
            merged_id_var='new_id',
            ignore_naming=True)[0] +\
        return_vars(['new_id'])
print(query)
res = g.execute(query).summary()

PROFILE
MATCH (a:node { id : 'a'}), (b:node { id : 'b'}) 
// use the APOC procedure 'apoc.refactor.mergeNodes' to merge nodes
CALL apoc.refactor.mergeNodes([a, b], {properties: 'combine'})
YIELD node as merged_node
// set appropriate node id
SET merged_node.id = toString(id(merged_node))
SET merged_node.count = NULL
WITH toString(id(merged_node)) as new_id, merged_node
// find and merge multiple relations resulting from the node merge
OPTIONAL MATCH (merged_node)-[out_rel:edge]->(suc)
WITH collect({neighbor: suc, edge: out_rel}) as suc_maps, merged_node, new_id
OPTIONAL MATCH (pred)-[in_rel:edge]->(merged_node)
WHERE pred.id <> merged_node.id
WITH collect({neighbor: pred, edge: in_rel}) as pred_maps, suc_maps, merged_node, new_id
WITH apoc.map.groupByMulti(suc_maps, 'neighbor') as suc_maps, apoc.map.groupByMulti(pred_maps, 'neighbor') as pred_maps, merged_node, new_id
WITH REDUCE(edges=[],  k in filter(k in keys(suc_maps) WHERE length(suc_maps[k]) > 1 ) | 
	edges + [suc_maps[k]]) + 
	R

In [264]:
profiling_utils.number_of_DbHits(res.profile)

['a']


8

In [265]:
print(res.result_available_after + res.result_consumed_after)

410


### Profiling merge queries

In [266]:
n1 = "c"
n2 = "g"
name = "c_g"
node_list = [n1, n2]

query ="PROFILE\n" +\
        match_nodes({n: n for n in node_list}) + "\n" +\
        merging_query1(
            original_vars=node_list,
            merged_var='merged_node',
            merged_id=name,
            merged_id_var='new_id',
            ignore_naming=True)[0] +\
        return_vars(['new_id'])
print(query)
res = g.execute(query).summary()

PROFILE
MATCH (c:node { id : 'c'}), (g:node { id : 'g'}) 
// accumulate all the attrs of the nodes to be merged
WITH [] as new_props, c, g
WITH new_props + REDUCE(pairs = [], k in keys(c) | 
	pairs + REDUCE(inner_pairs = [], v in c[k] | 
		inner_pairs + {key: k, value: v})) as new_props, c, g
WITH new_props + REDUCE(pairs = [], k in keys(g) | 
	pairs + REDUCE(inner_pairs = [], v in g[k] | 
		inner_pairs + {key: k, value: v})) as new_props, c, g
WITH apoc.map.groupByMulti(new_props, 'key') as new_props, c, g
WITH apoc.map.fromValues(REDUCE(pairs=[], k in keys(new_props) | 
	pairs + [k, REDUCE(values=[], v in new_props[k] | 
		values + CASE WHEN v.value IN values THEN [] ELSE v.value END)])) as new_props, c, g
SET c = new_props
WITH c as merged_node, g
// set appropriate node id
SET merged_node.id = toString(id(merged_node))
SET merged_node.count = NULL
WITH toString(id(merged_node)) as new_id, g, merged_node
// accumulate all the attrs of the edges incident to the merged nodes
WITH [] a

In [267]:
number_of_DbHits(res.profile)

NameError: name 'number_of_DbHits' is not defined

In [None]:
print(res.result_available_after + res.result_consumed_after)

#### Time to match a node with its id

In [None]:
node = "a"
query = match_node(node, node) + return_vars(node)
print(g.execution_time(query))

#### Time to add a node

In [None]:
node = "x"
attrs = {"act": {1}}
normalize_attrs(attrs)
query = create_node(node, node, 'new_id', attrs)[0]
print(g.execution_time(query))

#### Time to remove a node

In [None]:
node = "x"
query = match_node(node, node) + delete_nodes_var(node)
print(g.execution_time(query))

#### Time to add an edge

In [None]:
source = "a"
target = "c"
attrs = {"act": {1}}
normalize_attrs(attrs)
query = match_nodes({
            source: source,
            target: target
        })
query += create_edge(source, target, attrs)
print(g.execution_time(query))

#### Time to delete an edge

In [None]:
source = "a"
target = "c"
query = match_edge(source, target, source, target, 'r') 
query += delete_edge_var('r')
print(g.execution_time(query))

#### Time to clone a node

In [None]:
node = "a"
clone = "a_clone"
query =\
            match_node('x', node) +\
            cloning_query(
                original_var='x',
                clone_var='new_node',
                clone_id=clone,
                clone_id_var='uid')[0] +\
            return_vars(['uid'])
print(g.execution_time(query))

#### Cypher query for cloning (IgnoreNaming=True)

In [None]:
print("Here we ignore pretty naming of new clones\n")
clone2_res = g.clone_node('a', ignore_naming=True)
print("Created clone by the name: ", clone2_res)
print("Properties of the resulting node: ")
print(g.get_node(clone2_res))
print("Properties of incident edges are also cloned, e.g: ")
print(g.get_edge(clone2_res, 'b'))

#### Cypher query for cloning

In [None]:
clone1_res = g.clone_node('a')
print("Created clone by the name: ", clone1_res)
print("Properties of the resulting node: ")
print(g.get_node(clone1_res))
print("Properties of incident edges are also cloned, e.g: ")
print(g.get_edge(clone1_res, 'b'))