In [43]:
import requests
import json

In [46]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

You can also bypass TRAPI entirely and just use cypher to talk to the graph.  An intro to using the Cypher Query Language can be found here: https://neo4j.com/docs/getting-started/current/cypher-intro/.

There are two instances for accessing the graph using cypher.  There is one at http://robokopkg.renci.org which has a cypher browser on it, or you can write cypher and post it there.

Additionally, cypher queries can be generated by clicking the `Copy` button from the ExEmPLAR tool after setting up a query pattern: https://www.exemplar.mml.unc.edu/

An example query below asks "Find me a Gene that is related to both `PUBCHEM.COMPOUND:644073` (Buprenorphine) and `HP:0001337` (Tremor)".

In [44]:
#cypher = f'MATCH (a:`biolink:Gene`) RETURN a LIMIT 1'
cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN * LIMIT 100"
#cypher = f"MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] RETURN a.id as identifier LIMIT 100"

The first instance for accessing the graph using cypher is here using bolt.  After defining the Neo4jConnection class, the query is sent to http://robokopkg.renci.org

In [54]:
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [56]:
conn = Neo4jConnection(uri="bolt://robokopkg.renci.org:7687", user = 'neo4j', pwd = pw)

In [65]:
record_list = conn.query(cypher)

In [144]:
edges_nodes_keys = record_list[0].keys()

# Initializing a base dictionary containing results
base_dict = {}
for key in edges_nodes_keys:
    base_dict[key] = {}
print(base_dict)

{'n0_0': {}, 'n1_0': {}, 'n2_0': {}, 'r0_0': {}, 'r1_0': {}}


Data in records are returned with columns labeled for nodes and relationships/edges.

Node records contain all information about the object associated with a node, e.g. for Buprenorphine, contains label, SMILES, whether it's a receptor agonist or antagonist, etc. This is all in dictionary form.

Edge records contain tuples of length 3.  The first and third items in the tuple are for the initial and terminal nodes, respectively.  These two items are the same as what appears in the node records.  The second item of the tuple is a string denoting the type of relationship, e.g. `biolink:affects`

In [157]:
i = 0
for record in record_list: 
    node_edge_list = record.keys()
    node_list = [n for n in node_edge_list if 'n' in n]
    edge_list = [r for r in node_edge_list if 'r' in r]
    
    node_data = record.data(*node_list)
    edge_data = record.data(*edge_list)
    j = 0
    for label, node in node_data.items():
        base_dict[label][i] = node['name']
    
    for label, edge in edge_data.items():
        base_dict[label][i] = edge[1]

pp.pprint(base_dict)

{    'n0_0': {    0: 'Buprenorphine',
                  1: 'Buprenorphine',
                  2: 'Buprenorphine',
                  3: 'Buprenorphine',
                  4: 'Buprenorphine',
                  5: 'Buprenorphine'},
     'n1_0': {    0: 'CYP2D6',
                  1: 'CYP2D6',
                  2: 'CYP2D6',
                  3: 'CYP2D6',
                  4: 'CYP2D6',
                  5: 'CYP2D6'},
     'n2_0': {    0: 'Tremor',
                  1: 'Tremor',
                  2: 'Tremor',
                  3: 'Tremor',
                  4: 'Tremor',
                  5: 'Tremor'},
     'r0_0': {    0: 'biolink:affects',
                  1: 'biolink:affects',
                  2: 'biolink:directly_physically_interacts_with',
                  3: 'biolink:affects',
                  4: 'biolink:affects',
                  5: 'biolink:directly_physically_interacts_with'},
     'r1_0': {    0: 'biolink:genetic_association',
                  1: 'biolink:genetic_association'

In [159]:
import pandas as pd

output_dict = base_dict
json_str = json.dumps(output_dict, indent=4)
df = pd.read_json(json_str)
df.to_csv('output/results_cypher_robokopkg_renci.csv')

The second instance of accessing the graph using cypher is to send through the automat interface (https://automat.renci.org/robokopkg/cypher), using the same cypher query from above, submitted in a format compatible with json.

In [168]:
j = {'query': cypher}
results = requests.post('https://automat.renci.org/robokopkg/cypher',json=j)
print(results.status_code)

200


In [None]:
results_json = results.json()
print(results_json['results'][0]['columns'])
column_names = results_json['results'][0]['columns']

In [39]:
# Initializing a base dictionary containing results
base_dict = {}
for key in column_names:
    base_dict[key] = {}
print(base_dict)

{'n0_0': {}, 'n1_0': {}, 'n2_0': {}, 'r0_0': {}, 'r1_0': {}}


In [187]:
i = 0
for result in results_json['results'][0]['data']:
    print(f'\nResult: {i+1}')
    print(result['row'])
    j = 0
    for item in result['row']:
        print(f'\n{column_names[j]}')
        if 'r' in column_names[j]:
            print(item)
            #item_to_add = item['type']
        elif 'n' in column_names[j]:
            item_to_add = item['name']
        else:
            print(f'{column_names[j]} not found')
        print(item_to_add)
        base_dict[column_names[j]] = item_to_add
        j = j + 1
        
    i = i + 1
    if i > 0:
        break

# print(f"Number of results found: {len(results_json['results'][0]['data'])}")


Result: 1
[{'CHEBI_ROLE_delta_opioid_agent': True, 'smiles': 'CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1CC4=CC=C(O)C5=C4[C@@]3(CCN1CC1CC1)[C@H]2O5', 'fda_labels': 74, 'rgb': 28, 'CHEBI_ROLE_analgesic': True, 'CHEBI_ROLE_opioid_agent': True, 'sp2_c': 0, 'sp3_c': 23, 'CHEBI_ROLE_antagonist': True, 'CHEBI_ROLE_agonist': True, 'CHEBI_ROLE_opioid_analgesic': True, 'cd_formula': 'C29H41NO4', 'alogs': -4.44, 'CHEBI_ROLE_opioid_receptor_agonist': True, 'id': 'PUBCHEM.COMPOUND:644073', 'CHEBI_ROLE_drug': True, 'CHEBI_ROLE_neurotransmitter_agent': True, 'lipinski': 0, 'rotb': 5, 'o_n': 5, 'halogen': 0, 'CHEBI_ROLE_mu_opioid_agent': True, 'cd_molweight': 467.65, 'CHEBI_ROLE_delta_opioid_receptor_antagonist': True, 'CHEBI_ROLE_pharmaceutical': True, 'clogp': 3.99, 'oh_nh': 2, 'hetero_sp2_c': 0, 'equivalent_identifiers': ['HMDB:HMDB0015057', 'DRUGBANK:DB00921', 'INCHIKEY:RMRJXGBAOAMLHD-IHFGGWKQSA-N', 'UMLS:C0006405', 'CHEBI:3216', 'MESH:D002047', 'UNII:40D3SCR4GZ', 'CHEMBL.COMPOUND:CHEMBL5

In [None]:
import pandas as pd

output_dict = base_dict
json_str = json.dumps(output_dict, indent=4)
df = pd.read_json(json_str)
df.to_csv('output/results_cypher_automat.csv')