In [1]:
import mgclient # memgraph client


```
docker run -p 7687:7687 -p 7444:7444 -p 3000:3000 --name memgraph -e MEMGRAPH="--query-execution-timeout-sec=3600 --memory-limit=30720" memgraph/memgraph-platform
```

# Data Comparisons 

## `Memgraph`

**Connect to `memgraph` host**

In [44]:
conn = mgclient.connect(host="localhost", port=7687)   # connect to memgraph
conn.autocommit = True  # autocommit mode is required for DDL queries
cursor = conn.cursor() # cursor is used to execute queries

---

In [48]:
from controller.metakg import MetaKG
from model import ConsolidatedMetaKGDoc

In [49]:
index = ConsolidatedMetaKGDoc.Index.name

In [51]:
# Index on the name property of the Entity node
q="""
CREATE INDEX ON :Entity(name);
"""

cursor.execute(q) # execute query

In [52]:
for edge in MetaKG.get_all_via_scan(size=1000, index=index):
    subject = edge['_source']['subject']
    object = edge['_source']['object']
    predicate = edge['_source']['predicate']
    api_data = edge['_source']['api']  # Additional data as properties

    # Construct a unique identifier for each relationship
    # This is just an example - modify it according to your data structure
    unique_id = f"{subject}-{predicate}-{object}"
    
    # Cypher query to create nodes with a static relationship
    query = """
    MERGE (s:Entity {name: $subject})
    MERGE (o:Entity {name: $object})
    MERGE (s)-[r:RELATED_TO {id: $unique_id}]->(o)
    SET r.predicate = $predicate, r.api = $api_data
    """
    params = {'subject': subject, 'object': object, 'unique_id': unique_id, 'predicate': predicate, 'api_data': api_data}
    cursor.execute(query, params)



---

In [4]:
query="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..1]-(end:Entity {name: 'Publication'})
RETURN p;
"""

In [5]:
cursor.execute(query)

# Display the results
results = cursor.fetchall()
print(len(results))


1


In [6]:
query="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..2]-(end:Entity {name: 'Publication'})
RETURN p;
"""

In [7]:
cursor.execute(query)

# Display the results
results = cursor.fetchall()
print(len(results))


593


In [8]:
query="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..2]->(end:Entity {name: 'Publication'})
UNWIND relationships(p) AS r
WITH start, end, collect(r.predicate) AS predicates
RETURN start, end, predicates;
"""

In [None]:
cursor.execute(query)

# Display the results
results = cursor.fetchall()
print(len(results))


DatabaseError: Transaction was asked to abort because of transaction timeout.

`PROFILE MATCH` query 

In [5]:
q="""
PROFILE MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
"""

In [8]:
cursor.execute(q)

# Fetch and display the results
results = cursor.fetchall()

# Printing each result
for result in results:
    print(result)

('* Limit', 101, '  0.003494 %', '  0.020170 ms')
('* Produce {p}', 100, '  0.037991 %', '  0.219289 ms')
('* ConstructNamedPath', 100, '  0.027923 %', '  0.161174 ms')
('* Filter (start :Entity), {start.name}', 100, ' 67.900171 %', ' 391.930952 ms')
('* ExpandVariable (end)<-[anon1:RELATED_TO]-(start)', 1548889, ' 32.029290 %', ' 184.878330 ms')
('* ScanAllByLabelPropertyValue (end :Entity {name})', 1, '  0.001127 %', '  0.006506 ms')
('* Once', 1, '  0.000003 %', '  0.000019 ms')


The provided Cypher query is performing the following operations in a Neo4j graph database:
```
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
```
1. **MATCH Clause**: 
   - `MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})`
   - This part of the query is looking for a specific pattern in the graph. It defines a variable `p` that represents a path between two nodes (labeled as `Entity`). 
   - The first node (`start`) is an `Entity` with a `name` property equal to `'InformationResource'`.
   - The last node (`end`) is an `Entity` with a `name` property equal to `'Publication'`.
   - The `[:RELATED_TO*..3]->` segment specifies the type of relationships to follow (`RELATED_TO`) and the depth of the path. It indicates that the query should find paths where `start` and `end` are connected by a sequence of up to 3 `RELATED_TO` relationships. The `->` indicates the direction of the relationship is from `start` to `end`.

2. **RETURN Clause**:
   - `RETURN p`
   - This clause specifies that the query should return the paths that it finds. Each path `p` includes the start node, the end node, and all nodes and relationships between them that match the specified pattern.

3. **LIMIT Clause**:
   - `LIMIT 100`
   - This part of the query limits the number of paths returned to 100. Without this limit, the query could potentially return a very large number of paths, especially in a large or densely connected graph, which might be more data than is needed and could impact performance.

In summary, the query is searching for up to 100 instances of a path in the graph where nodes of type `Entity` and with specific `name` properties are connected by a chain of up to 3 `RELATED_TO` relationships. The query is particularly useful for finding specific connections and relationships in a graph, especially when looking for indirect relationships (connections not immediately obvious) between two entities.

In [10]:
path_query="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
"""

cursor.execute(path_query)

# Fetch and display the results
results = cursor.fetchall()

# Printing each result
for result in results:
    print(result)

(<mgclient.Path(nodes=[<mgclient.Node(id=226, labels={'Entity'}, properties={'name': 'InformationResource'}) at 0x10cab9590>, <mgclient.Node(id=121, labels={'Entity'}, properties={'name': 'Publication'}) at 0x10cabb390>, <mgclient.Node(id=114, labels={'Entity'}, properties={'name': 'SmallMolecule'}) at 0x10c940ed0>, <mgclient.Node(id=121, labels={'Entity'}, properties={'name': 'Publication'}) at 0x10c942a30>], relationships=[<mgclient.Relationship(start_id=226, end_id=121, type='RELATED_TO', properties={'api': [{'bte': {'query_operation': {'input_separator': ',', 'method': 'post', 'params': None, 'path': '/query', 'path_params': None, 'request_body': None, 'server': 'https://kg2.transltr.io/api/rtxkg2/v1.3', 'support_batch': True}}, 'name': 'RTX KG2 - TRAPI 1.3.0', 'smartapi': {'id': 'ccd4a8bb83de81401e9a27f1d8e7f948', 'metadata': 'https://raw.githubusercontent.com/RTXteam/RTX/production/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/openapi/openapi.yaml', 'ui': 'https://smart-

In [11]:
# returns clean rows of nodes and relationships (paths) -- very slow at >= 3 hops ...maybe clean rows post process?
q="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
UNWIND relationships(p) AS r
WITH p, collect(r.predicate) AS predicates
RETURN reduce(names = [], n IN nodes(p) | names + n.name) AS nodeNames, predicates
LIMIT 10;
"""

In [38]:
q="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN p
LIMIT 100;
"""

cursor.execute(q)

# Fetch and display the results
results = cursor.fetchall()

# Printing each result
for result in results:
    path = result[0]  # Assuming each result is a path
    nodes = path.nodes
    relationships = path.relationships

    # Iterating through the nodes and relationships
    for i in range(len(nodes)):
        # Print the node's name
        print(nodes[i].properties['name'], end='')

        if i < len(relationships):
            # Print the relationship's predicate
            print(' - ' + relationships[i].properties['predicate'] + ' -> ', end='')

    print()  # New line after each path
    print()  # New line after each path


InformationResource - subclass_of -> Publication - correlated_with -> SmallMolecule - correlated_with -> Publication

InformationResource - subclass_of -> InformationContentEntity - correlated_with -> SmallMolecule - correlated_with -> Publication

InformationResource - related_to -> InformationContentEntity - correlated_with -> SmallMolecule - correlated_with -> Publication

InformationResource - associated_with -> PhenotypicFeature - ameliorates -> SmallMolecule - correlated_with -> Publication

InformationResource - associated_with -> DiseaseOrPhenotypicFeature - related_to -> SmallMolecule - correlated_with -> Publication

InformationResource - associated_with -> DiseaseOrPhenotypicFeature - is_side_effect_of -> SmallMolecule - correlated_with -> Publication

InformationResource - associated_with -> PhenotypicFeature - treated_by -> SmallMolecule - correlated_with -> Publication

InformationResource - associated_with -> Disease - is_side_effect_of -> SmallMolecule - correlated_with

In [53]:
q="""
MATCH p=(start:Entity {name: 'InformationResource'})-[:RELATED_TO*..3]->(end:Entity {name: 'Publication'})
RETURN count(p) AS total;
"""

cursor.execute(q)
results = cursor.fetchall()

print(results)

[(346364,)]


---

In [None]:
from utils.metakg.path_finder import MetaKGPathFinder
