In [1]:
import requests
import json

In [2]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

You can also bypass TRAPI entirely and just use cypher to talk to the graph.  An intro to using the Cypher Query Language can be found here: https://neo4j.com/developer/cypher/guide-cypher-basics/

There are two instances for accessing the graph using cypher.  There is one at http://robokopkg.renci.org which has a cypher browser on it, or you can write cypher and post it there.

Additionally, cypher queries can be generated by clicking the `Copy` button from the ExEmPLAR tool after setting up a query pattern: https://www.exemplar.mml.unc.edu/

The simplest example query below asks "Find me a Gene that is related to both `PUBCHEM.COMPOUND:644073` (Buprenorphine) and `HP:0001337` (Tremor)".

We encountered issues with using the notation "RETURN \*", mainly because this is not specific with what to return.  Queries to `automat` return edge properties, not including direction or predicates, but queries to `robokopkg.renci.org` return the direction and predicates, not including edge properties.  We replaced the "\*" with a format to get node pairs and specific relationship information, including the type and properties.

In [20]:
#cypher = f'MATCH (a:`biolink:Gene`) RETURN a LIMIT 1'
#cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN * LIMIT 100"
#cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN a.id as identifier LIMIT 100"

# Simplified query for single entity nodes
# cypher = "MATCH (n0_0:`biolink:ChemicalEntity` {name:'Buprenorphine'})-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature` {name:'Tremor'}) RETURN * LIMIT 100"

# Buprenorphine -> [Gene] -> Tremor
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, type(r0_0) as edge_1_type, r0_0 as edge_1  LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, r0_0 as edge_1, type(r0_0) as edge_1_type, properties(r0_0) as edge_1_properties  LIMIT 100"
# cyper = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, n1_0 as gene_1, n2_0 as phenotype_1, type(r0_0) as edge_1_type, r0_0 as edge_1, r1_0 as edge_2  LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, r0_0 as edge_1, type(r0_0) as edge_1_type, properties(r0_0) as edge_1_properties, [startNode(r0_0),endNode(r0_0)] as edge_1_node_pair LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2 LIMIT 100"
cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2, [n0_0.name, n1_0.name, n2_0.name] as node_names LIMIT 100"

# Asterixis
# cypher = "MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Asterixis'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2 LIMIT 100"

## Cypher - robokopkg.renci.org

The first instance for accessing the graph using cypher is here using bolt.  After defining the Neo4jConnection class, the query is sent to http://robokopkg.renci.org

In [21]:
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [22]:
pw = ''
conn = Neo4jConnection(uri="bolt://robokopkg.renci.org:7687", user = 'neo4j', pwd = pw)

In [23]:
record_list = conn.query(cypher)

Data in records are returned with columns labeled for nodes and relationships/edges.

Node records contain all information about the object associated with a node, e.g. for Buprenorphine, contains label, SMILES, whether it's a receptor agonist or antagonist, etc. This is all in dictionary form.

Edge records contain tuples of length 3.  The first and third items in the tuple are for the initial and terminal nodes, respectively.  These two items are the same as what appears in the node records.  The second item of the tuple is a string denoting the type of relationship, e.g. `biolink:affects`

In [27]:
from datetime import datetime
from pathlib import Path

now = datetime.now()
dt_string = now.strftime("%Y-%m-%d_%H%M%S")
write_dir = Path("output/Cypher_robokopkg",str(dt_string))
write_dir.mkdir(parents=True, exist_ok=True)

In [35]:
i = 0

for record in record_list: 
    record_data = record.data()
    # pp.pprint(record_data)
    # print(f'Result: {i+1}')
    j = 0
    record_data_first2 = {k: record_data[k] for k in list(record_data)[:2]}
    for label, data in record_data_first2.items():
        print(f"{data[0]['name']} -> {data[1][0]} -> {data[2]['name']}")||{data[1][1]}")
        string_out_test = f"{data[0]['name']} -> {data[1][0]} -> {data[2]['name']}||{data[1][1]}"
        if string_out_test not in string_out_list:
            string_out_list.append(string_out_test)
        
    i = i + 1
    # if i > 0:
    #     break

Buprenorphine -> biolink:affects -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor
CYP2D6 -> biolink:affects -> Buprenorphine
CYP2D6 -> biolink:genetic_association -> Tremor
Buprenorphine -> biolink:regulates -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor
Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor
Buprenorphine -> biolink:affects -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor
CYP2D6 -> biolink:affects -> Buprenorphine
CYP2D6 -> biolink:genetic_association -> Tremor
Buprenorphine -> biolink:regulates -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor
Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6
CYP2D6 -> biolink:genetic_association -> Tremor


In [37]:
import os
from collections import Counter

i = 0

string_out_list = []
for record in record_list: 
    record_data = record.data()
    # print(f'Result: {i+1}')
    j = 0
    # for label, data in record_data.items():
    record_data_first2 = {k: record_data[k] for k in list(record_data)[:2]}
    for label, data in record_data_first2.items():
        string_out_test = f"{data[0]['name']} -> {data[1][0]} -> {data[2]['name']}||{data[1][1]}"
        # print(f"{data[0]['name']} -> {data[1][0]} -> {data[2]['name']}")
        # print(f"Edge properties: {data[1][1]}")
        if string_out_test not in string_out_list:
            string_out_list.append(string_out_test)
        j = j + 1
    # print("")

    i = i + 1
    # if i > 0:
        # break
    
combined_node_list = "_".join(results_json['results'][0]['data'][0]['row'][2]).replace(" ", "_")
print(combined_node_list)

string_out_list = [i.split('||', 1)[0] for i in string_out_list]

string_out_dict = dict(Counter(string_out_list).items())
pp.pprint(string_out_dict)

with open(os.path.join(write_dir,combined_node_list+".txt"), 'w') as convert_file:
    convert_file.write(json.dumps(string_out_dict))

Buprenorphine_CYP2D6_Tremor
{    'Buprenorphine -> biolink:affects -> CYP2D6': 1,
     'Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'Buprenorphine -> biolink:regulates -> CYP2D6': 1,
     'CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'CYP2D6 -> biolink:genetic_association -> Tremor': 2}


## Cypher - automat.renci.org

The second instance of accessing the graph using cypher is to send through the automat interface (https://automat.renci.org/robokopkg/cypher), using the same cypher query from above, submitted in a format compatible with json.

In [4]:
cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2, [n0_0.name, n1_0.name, n2_0.name] as node_names LIMIT 100"

In [11]:
j = {'query': cypher}
results = requests.post('https://automat.renci.org/robokopkg/cypher',json=j)
results_json = results.json()
print(results_json['results'][0]['columns'])
column_names = results_json['results'][0]['columns']
print(results.status_code)

['edge_1', 'edge_2', 'node_names']
200


In [12]:
from datetime import datetime
from pathlib import Path

now = datetime.now()
dt_string = now.strftime("%Y-%m-%d_%H%M%S")
write_dir = Path("output/Cypher_automat",str(dt_string))
write_dir.mkdir(parents=True, exist_ok=True)

In [13]:
import os
from collections import Counter

i = 0

string_out_list = []
for result in results_json['results'][0]['data']:
    # print(f'Result: {i+1}')
    j = 0
    for item in result['row'][0:2]:
        # print(item)
        # print(f"{column_names[j]}: {item[0]['name']} -> {item[1][0]} -> {item[2]['name']}")
        # print(f"Edge properties: {item[1][1]}\n")
        # print(f"{item[0]['name']} -> {item[1][0]} -> {item[2]['name']}")
        # string_out = f"{item[0]['name']} -> {item[1][0]} -> {item[2]['name']}"
        string_out_test = f"{item[0]['name']} -> {item[1][0]} -> {item[2]['name']}||{item[1][1]}"
        if string_out_test not in string_out_list:
            string_out_list.append(string_out_test)
        j = j + 1
    # print("")

    i = i + 1
    # if i > 0:
    #     break
    
combined_node_list = "_".join(results_json['results'][0]['data'][0]['row'][2]).replace(" ", "_")
print(combined_node_list)

string_out_list = [i.split('||', 1)[0] for i in string_out_list]

string_out_dict = dict(Counter(string_out_list).items())
pp.pprint(string_out_dict)

with open(os.path.join(write_dir,combined_node_list+".txt"), 'w') as convert_file:
    convert_file.write(json.dumps(string_out_dict))

Buprenorphine_CYP2D6_Tremor
{    'Buprenorphine -> biolink:affects -> CYP2D6': 1,
     'Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'CYP2D6 -> biolink:genetic_association -> Tremor': 2}
