# `Memgraph` & `NetworkX` Report
Comparison of the `get_paths()` feature in `memgraph` vs `networkx`

In [3]:
import time

---  
## `Memgraph`  

In [1]:
import mgclient # connectto memgraph client


In [2]:
# connect to the memgraph server
conn = mgclient.connect(host="localhost", port=7687)   # connect to memgraph
conn.autocommit = True  # autocommit mode is required for DDL queries
cursor = conn.cursor() # cursor is used to execute queries

In [25]:
# Delete all nodes and relationships
# cursor.execute("MATCH (n) DETACH DELETE n")

In [6]:
# Methods for printing results & time

def get_time(start_time):
    # End timing
    end_time = time.time()

    # Calculate duration in seconds
    duration_seconds = end_time - start_time

    # Check if duration is 60 seconds or more
    if duration_seconds >= 60:
        # Convert to minutes and round to two decimal places
        duration_minutes = round(duration_seconds / 60, 2)
        print(f"Execution time: {duration_minutes} minutes")
    else:
        # Round to two decimal places for seconds
        rounded_seconds = round(duration_seconds, 2)
        print(f"Execution time: {rounded_seconds} seconds")

def print_memgraph_paths_results(results):
    for result in results:
        path = result[0]  # Get the path from the result

        # Print the full path
        full_path = " -> ".join(node.properties['name'] for node in path.nodes)
        print("Full Path:", full_path)

        # Iterate over the nodes to print predicates for each segment
        for i in range(len(path.nodes) - 1):
            start_node_name = path.nodes[i].properties['name']
            end_node_name = path.nodes[i + 1].properties['name']

            # Assuming the predicates are stored in the relationships in the same sequence
            predicates = path.relationships[i].properties.get('predicates', [])

            print(f"{start_node_name} - {end_node_name}: {predicates}")

        print("--------------------------------------------------\n")

View Index Info

In [None]:
# Index on the name property of the Entity node
q="""
CREATE INDEX ON :Entity(name);
"""

cursor.execute(q) # execute query

In [8]:
# Show index information for all indexes
cursor.execute("""SHOW INDEX INFO;""")
cursor.fetchall()

[('label+property', 'Entity', 'name', 113)]

In [6]:
# Show index information for the Entity node index
cursor.execute("""ANALYZE GRAPH;""")
cursor.fetchall()

[('Entity', 'name', 113, 113, 1.0, 0.0, 111.80530973451327)]

---
The Algorithm:
```
path_query = """
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..5]->(end:Entity {name: 'Publication'})
RETURN p;
"""
start_time= time.time()
cursor.execute(path_query)
mg_5h_results = cursor.fetchall()
print(f"Number of results(4-hop): {len(mg_5h_results)}")
get_time(start_time)
```

Get a set of 100 

In [5]:
start_time = time.time() # Start timing

path_query = """
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
"""
cursor.execute(path_query)
results = cursor.fetchall()

print(f"Number of results: {len(results)}")
get_time(start_time)


Number of results: 100
Execution time: 0.64 seconds


3-hop

In [6]:
path_query = """
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p;
"""

start_time= time.time()
cursor.execute(path_query)
mg_3h_results = cursor.fetchall()
print(f"Number of results(3-hop): {len(mg_3h_results)}")
get_time(start_time)

Number of results(3-hop): 531
Execution time: 1.15 seconds


4-hop

In [7]:
path_query = """
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..4]->(end:Entity {name: 'Publication'})
RETURN p;
"""
start_time= time.time()
cursor.execute(path_query)
mg_4h_results = cursor.fetchall()
print(f"Number of results(4-hop): {len(mg_4h_results)}")
get_time(start_time)

Number of results(4-hop): 37205
Execution time: 2.8 minutes


5-hop

In [16]:
path_query = """
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..5]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
"""
start_time= time.time()
cursor.execute(path_query)
mg_5h_results = cursor.fetchall()
print(f"Number of results(5-hop): {len(mg_5h_results)}")
get_time(start_time)


Number of results(5-hop): 100
Execution time: 0.77 seconds


### `Memgraph` with `algo` 

The above algorithm finds all paths, we need to find simple paths.

In [15]:
query="""
MATCH (start:Entity {name: 'InformationResource'}) 
MATCH (end:Entity {name: 'Publication'})
CALL algo.all_simple_paths(start, end, ['>'], 3) 
YIELD path AS result RETURN result;
"""
start_time= time.time()
cursor.execute(query)
algo_3h_results = cursor.fetchall()
print(f"Number of results(3-hop): {len(algo_3h_results)}")
get_time(start_time)

Number of results(3-hop): 448
Execution time: 1.13 seconds


In [7]:
algo_3h_results[0]

(<mgclient.Path(nodes=[<mgclient.Node(id=264, labels={'Entity'}, properties={'name': 'InformationResource'}) at 0x11b00a250>, <mgclient.Node(id=206, labels={'Entity'}, properties={'name': 'Behavior'}) at 0x11b07f450>, <mgclient.Node(id=177, labels={'Entity'}, properties={'name': 'MolecularEntity'}) at 0x11b07e160>, <mgclient.Node(id=159, labels={'Entity'}, properties={'name': 'Publication'}) at 0x11b105e60>], relationships=[<mgclient.Relationship(start_id=206, end_id=264, type='RELATED_TO', properties={'api': [[{'bte': {'query_operation': {'input_separator': ',', 'method': 'post', 'params': None, 'path': '/query', 'path_params': None, 'request_body': None, 'server': 'https://kg2.transltr.io/api/rtxkg2/v1.3', 'support_batch': True}}, 'name': 'RTX KG2 - TRAPI 1.3.0', 'smartapi': {'id': 'ccd4a8bb83de81401e9a27f1d8e7f948', 'metadata': 'https://raw.githubusercontent.com/RTXteam/RTX/production/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/openapi/openapi.yaml', 'ui': 'https://smart-

#### Filtering with relations(predicates)

In [None]:
query="""
MATCH (start:Entity {name: 'InformationResource'}), (end:Entity {name: 'Publication'})
CALL algo.all_simple_paths(start, end, ['>'], 3)
YIELD path
RETURN path;
"""
start_time= time.time()
cursor.execute(query)
algo_3h_results_f = cursor.fetchall()
print(f"Number of results(3-hop): {len(algo_3h_results_f)}")
get_time(start_time)


Number of results(3-hop): 448
Execution time: 0.73 seconds


In [10]:
query="""
MATCH (start:Entity {name: 'InformationResource'}), (end:Entity {name: 'Publication'})
CALL algo.all_simple_paths(start, end, ['>'], 4)
YIELD path
RETURN path;
"""
start_time= time.time()
cursor.execute(query)
algo_4h_results_f = cursor.fetchall()
print(f"Number of results(4-hop): {len(algo_4h_results_f)}")
get_time(start_time)


Number of results(4-hop): 30776
Execution time: 12.01 minutes


In [None]:
query="""
MATCH (start:Entity {name: 'InformationResource'}), (end:Entity {name: 'Publication'})
CALL algo.all_simple_paths(start, end, ['>'], 5)
YIELD path
RETURN path
LIMIT 100;
"""
start_time= time.time()
cursor.execute(query)
algo_5h_results_f = cursor.fetchall()
print(f"Number of results(5-hop): {len(algo_5h_results_f)}")
get_time(start_time)


---

## NetworkX

In [4]:
from utils.metakg.path_finder import MetaKGPathFinder
import networkx.classes.digraph as nx_digraph
import networkx as nx


In [9]:
subject='InformationResource'
object='Publication'

start_time=time.time()
query_data = {
    'q': '*'
}
pf = MetaKGPathFinder(query_data)

G = pf.G
cutoff=3
nx_3h_results = pf.get_paths(subject=subject, object=object, cutoff=cutoff)
print(f"Number of results(3-hop): {len(nx_3h_results)}")
get_time(start_time)

Number of results(3-hop): 448
Execution time: 9.61 seconds


In [11]:
for result in nx_3h_results:
    print(result)

{'path': ['InformationResource', 'NamedThing', 'AnatomicalEntity', 'Publication'], 'edges': [{'subject': 'InformationResource', 'object': 'NamedThing', 'predicate': 'associated_with', 'api': [{'name': 'RTX KG2 - TRAPI 1.3.0', 'smartapi': {'id': 'ccd4a8bb83de81401e9a27f1d8e7f948'}}, {'name': 'ARAX Translator Reasoner - TRAPI 1.3.0', 'smartapi': {'id': 'e248aefca0f469229e82cca80fbabc89'}}]}, {'subject': 'InformationResource', 'object': 'NamedThing', 'predicate': 'subclass_of', 'api': [{'name': 'RTX KG2 - TRAPI 1.3.0', 'smartapi': {'id': 'ccd4a8bb83de81401e9a27f1d8e7f948'}}, {'name': 'ARAX Translator Reasoner - TRAPI 1.3.0', 'smartapi': {'id': 'e248aefca0f469229e82cca80fbabc89'}}]}, {'subject': 'NamedThing', 'object': 'AnatomicalEntity', 'predicate': 'occurs_together_in_literature_with', 'api': [{'name': 'Text Mined Cooccurrence API', 'smartapi': {'id': '71fa2e0f0f1fe1ec67f4ddb719db5ef3'}}, {'name': 'Text Mined Cooccurrence API (TRAPI 1.4.0)', 'smartapi': {'id': '2d7efe7d2dabb473c5856e7da

In [10]:
subject='InformationResource'
object='Publication'

start_time=time.time()
query_data = {
    'q': '*'
}
pf = MetaKGPathFinder(query_data)

start_time=time.time()
G = pf.G
cutoff=4
nx_4h_results = pf.get_paths(subject=subject, object=object, cutoff=cutoff)
print(f"Number of results(4-hop): {len(nx_4h_results)}")
get_time(start_time)

Number of results(4-hop): 30776
Execution time: 23.41 seconds


In [38]:
subject='InformationResource'
object='Publication'

start_time=time.time()
query_data = {
    'q': '*'
}
pf = MetaKGPathFinder(query_data)


start_time=time.time()
G = pf.G
cutoff=5
nx_5h_results = pf.get_paths(subject=subject, object=object, cutoff=cutoff)
print(f"Number of results(5-hop): {len(nx_5h_results)}")
get_time(start_time)



---