In [1]:
import requests
import json

In [2]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

You can also bypass TRAPI entirely and just use cypher to talk to the graph.  An intro to using the Cypher Query Language can be found here: https://neo4j.com/developer/cypher/guide-cypher-basics/

There are two instances for accessing the graph using cypher.  There is one at http://robokopkg.renci.org which has a cypher browser on it, or you can write cypher and post it there.

Additionally, cypher queries can be generated by clicking the `Copy` button from the ExEmPLAR tool after setting up a query pattern: https://www.exemplar.mml.unc.edu/

The simplest example query below asks "Find me a Gene that is related to both `PUBCHEM.COMPOUND:644073` (Buprenorphine) and `HP:0001337` (Tremor)".

We encountered issues with using the notation "RETURN \*", mainly because this is not specific with what to return.  Queries to `automat` return edge properties, not including direction or predicates, but queries to `robokopkg.renci.org` return the direction and predicates, not including edge properties.  We replaced the "\*" with a format to get node pairs and specific relationship information, including the type and properties.

In [18]:
#cypher = f'MATCH (a:`biolink:Gene`) RETURN a LIMIT 1'
#cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN * LIMIT 100"
#cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN a.id as identifier LIMIT 100"

# Simplified query for single entity nodes
# cypher = "MATCH (n0_0:`biolink:ChemicalEntity` {name:'Buprenorphine'})-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature` {name:'Tremor'}) RETURN * LIMIT 100"

# Buprenorphine -> [Gene] -> Tremor
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, type(r0_0) as edge_1_type, r0_0 as edge_1  LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, r0_0 as edge_1, type(r0_0) as edge_1_type, properties(r0_0) as edge_1_properties  LIMIT 100"
# cyper = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, n1_0 as gene_1, n2_0 as phenotype_1, type(r0_0) as edge_1_type, r0_0 as edge_1, r1_0 as edge_2  LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN n0_0 as chemical_1, r0_0 as edge_1, type(r0_0) as edge_1_type, properties(r0_0) as edge_1_properties, [startNode(r0_0),endNode(r0_0)] as edge_1_node_pair LIMIT 100"
# cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2 LIMIT 100"

# Asterixis
cypher = "MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Asterixis'] RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, [startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2 LIMIT 100"

The first instance for accessing the graph using cypher is here using bolt.  After defining the Neo4jConnection class, the query is sent to http://robokopkg.renci.org

In [19]:
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [20]:
pw = ''
conn = Neo4jConnection(uri="bolt://robokopkg.renci.org:7687", user = 'neo4j', pwd = pw)

Failed to write data to connection IPv4Address(('robokopkg.renci.org', 7687)) (IPv4Address(('152.54.15.139', 7687)))


In [21]:
record_list = conn.query(cypher)

In [22]:
edges_nodes_keys = record_list[0].keys()

# Initializing a base dictionary containing results
base_dict = {}
for key in edges_nodes_keys:
    base_dict[key] = {}
print(base_dict)

{'edge_1': {}, 'edge_2': {}}


Data in records are returned with columns labeled for nodes and relationships/edges.

Node records contain all information about the object associated with a node, e.g. for Buprenorphine, contains label, SMILES, whether it's a receptor agonist or antagonist, etc. This is all in dictionary form.

Edge records contain tuples of length 3.  The first and third items in the tuple are for the initial and terminal nodes, respectively.  These two items are the same as what appears in the node records.  The second item of the tuple is a string denoting the type of relationship, e.g. `biolink:affects`

In [23]:
i = 0
for record in record_list: 
    record_data = record.data()

    print(f'Result: {i+1}\n')
    for label, data in record_data.items():
        print(f"{label}: {data[0]['name']} -> {data[1][0]} -> {data[2]['name']}")
        print(f"Edge properties: {data[1][1]}\n")

    i = i + 1

#pp.pprint(base_dict)

Result: 1

edge_1: Buprenorphine -> biolink:affects -> OPRM1
Edge properties: {'object_direction_qualifier': 'decreased', 'qualified_predicate': 'biolink:causes', 'biolink:primary_knowledge_source': 'infores:ctd', 'description': 'decreases molecular interaction with', 'NCBITaxon': '9606', 'object_aspect_qualifier': 'molecular_interaction', 'publications': ['PMID:21866885']}

edge_2: OPRM1 -> biolink:genetic_association -> Asterixis
Edge properties: {'biolink:primary_knowledge_source': 'DisGeNET', 'biolink:aggregator_knowledge_source': ['infores:pharos']}

Result: 2

edge_1: Buprenorphine -> biolink:affects -> OPRM1
Edge properties: {'object_direction_qualifier': 'increased', 'affinity_parameter': 'Ki', 'qualified_predicate': 'biolink:causes', 'biolink:primary_knowledge_source': 'infores:pharos', 'object_aspect_qualifier': 'activity', 'affinity': 8.29}

edge_2: OPRM1 -> biolink:genetic_association -> Asterixis
Edge properties: {'biolink:primary_knowledge_source': 'DisGeNET', 'biolink:ag

In [159]:
# import pandas as pd

# output_dict = base_dict
# json_str = json.dumps(output_dict, indent=4)
# df = pd.read_json(json_str)
# df.to_csv('output/results_cypher_robokopkg_renci.csv')

The second instance of accessing the graph using cypher is to send through the automat interface (https://automat.renci.org/robokopkg/cypher), using the same cypher query from above, submitted in a format compatible with json.

In [9]:
j = {'query': cypher}
results = requests.post('https://automat.renci.org/robokopkg/cypher',json=j)
print(results.status_code)

200


In [10]:
results_json = results.json()
print(results_json['results'][0]['columns'])
column_names = results_json['results'][0]['columns']

['edge_1', 'edge_2']


In [11]:
# Initializing a base dictionary containing results
base_dict = {}
for key in column_names:
    base_dict[key] = {}
print(base_dict)

{'edge_1': {}, 'edge_2': {}}


In [17]:
i = 0

for result in results_json['results'][0]['data']:
    print(f'Result: {i+1}\n')
    j = 0
    for item in result['row']:
        print(f"{column_names[j]}: {item[0]['name']} -> {item[1][0]} -> {item[2]['name']}")
        print(f"Edge properties: {item[1][1]}\n")
        j = j + 1

    i = i + 1
    # if i > 0:
    #     break

Result: 1

edge_1: Buprenorphine -> biolink:affects -> CYP2D6
Edge properties: {'object_direction_qualifier': 'decreased', 'qualified_predicate': 'biolink:causes', 'biolink:primary_knowledge_source': 'infores:ctd', 'description': 'decreases activity of', 'NCBITaxon': '9606', 'object_aspect_qualifier': 'activity', 'publications': ['PMID:12756210']}

edge_2: CYP2D6 -> biolink:genetic_association -> Tremor
Edge properties: {'biolink:primary_knowledge_source': 'DisGeNET', 'biolink:aggregator_knowledge_source': ['infores:pharos']}

Result: 2

edge_1: CYP2D6 -> biolink:affects -> Buprenorphine
Edge properties: {'object_direction_qualifier': 'increased', 'qualified_predicate': 'biolink:causes', 'biolink:primary_knowledge_source': 'infores:ctd', 'description': 'increases metabolic processing of', 'NCBITaxon': '9606', 'object_aspect_qualifier': 'metabolic_processing', 'publications': ['PMID:12756210']}

edge_2: CYP2D6 -> biolink:genetic_association -> Tremor
Edge properties: {'biolink:primary_k

In [None]:
# import pandas as pd

# output_dict = base_dict
# json_str = json.dumps(output_dict, indent=4)
# df = pd.read_json(json_str)
# df.to_csv('output/results_cypher_automat.csv')