In [4]:
# Parameters to input
automat_cypher_submit_url = 'https://automat.renci.org/robokopkg/cypher'

# Initializing directory to write
from datetime import datetime
from pathlib import Path

now = datetime.now()
dt_string = now.strftime("%Y-%m-%d_%H%M%S")
write_dir = Path("output/Cypher_automat",str(dt_string))
write_dir.mkdir(parents=True, exist_ok=True)

In [5]:
import requests
import json

In [6]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

In addition to the methods using TRAPI, outlined in `HelloRobokop_TRAPI.ipynb` and `HelloRobokop_TRAPI_multiple_IDs.ipynb`, you can use the Cypher Query Language to talk to the graph.  An intro to using the Cypher Query Language can be found here: https://neo4j.com/developer/cypher/guide-cypher-basics/

The example query below asks "Find me a Gene that is related to both `PUBCHEM.COMPOUND:644073` (Buprenorphine) and `HP:0001337` (Tremor)".

In [7]:
# Buprenorphine -> [Gene] -> Tremor
cypher = f"""MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`) 
WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor'] 
RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1, 
[startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2, 
[n0_0.name, n1_0.name, n2_0.name] as node_names LIMIT 100"""


The `MATCH` portion tells the structure of the query and names each node and edge. The first node is designated `n0_0` and has the type `biolink:ChemicalEntity`, which is a defined property used in the ROBOKOP KG.  Similarly, the second node is designated `n1_0` with the type `biolink:Gene`, and the third node is designated `n2_0` with the type `biolink:DiseaseOrPhenotypicFeature`.  Between the nodes are the edges, designated `r0_0` for the first edge and `r1_0` for the second edge.

The `WHERE` piece restricts the answers returned to those where the name for `n0_0` is `Buprenorphine`  and the name for `n2_0` is `Tremor`. As currently written, this query could be expanded to allow more than one name for either `n0_0` or `n2_0` by adding additional names to the lists defined by the `[ ]` notation in the query. For example, the query could be expanded to include an additional name for `tremor` as follows: `n2_0.name IN ['Tremor', 'Asterixis']`.

Three query results are returned in this example in the `RETURN` portion.  The first contains the subject and object nodes and the edge properties of the first edge as `edge_1`.  The second contains the subject and object nodes and the edge properties of the second edge as `edge_2`.  The third contains each of the nodes and their names as `node_names`.

Any adjustments made to the `RETURN` section will require further adjustments downstream when extracting results in the sections below.

To access the graph using cypher, queries are sent through the automat interface (https://automat.renci.org/robokopkg/cypher) in a json format with an explicitly labeled query field.

The data is returned in a JSON format and can be extracted after calling the right property of the resulting JSON string.  The JSON returns each row as a list of ordered columns rather than a format that includes the column labels.

In [9]:
j = {'query': cypher}
results = requests.post(automat_cypher_submit_url,json=j)
print(results.status_code)
results_json = results.json()
print(results_json['results'][0]['columns'])
column_names = results_json['results'][0]['columns']

200
['edge_1', 'edge_2', 'node_names']


In [10]:
pp.pprint(results_json['results'][0]['data'][0]['row'])

[    [    {    'CHEBI_ROLE_agonist': True,
               'CHEBI_ROLE_analgesic': True,
               'CHEBI_ROLE_antagonist': True,
               'CHEBI_ROLE_delta_opioid_agent': True,
               'CHEBI_ROLE_delta_opioid_receptor_antagonist': True,
               'CHEBI_ROLE_drug': True,
               'CHEBI_ROLE_kappa_opioid_agent': True,
               'CHEBI_ROLE_kappa_opioid_receptor_antagonist': True,
               'CHEBI_ROLE_mu_opioid_agent': True,
               'CHEBI_ROLE_mu_opioid_receptor_agonist': True,
               'CHEBI_ROLE_neurotransmitter_agent': True,
               'CHEBI_ROLE_opioid_agent': True,
               'CHEBI_ROLE_opioid_analgesic': True,
               'CHEBI_ROLE_opioid_receptor_agonist': True,
               'CHEBI_ROLE_opioid_receptor_antagonist': True,
               'CHEBI_ROLE_pharmaceutical': True,
               'CHEBI_ROLE_pharmacological_role': True,
               'alogs': -4.44,
               'arom_c': 6,
               'cd_formul

The code below extracts results based on the structure of the original cypher query in the section above.  Any changes to the `RETURN` part of the query will require adjustments to the code below.

In [11]:
import os
from collections import Counter

string_out_list = []
for result in results_json['results'][0]['data']:
    for item in result['row'][0:2]:
        string_out = f"{item[0]['name']} -> {item[1][0]} -> {item[2]['name']}||{item[1][1]}"
        if string_out not in string_out_list:
            string_out_list.append(string_out)
    
combined_node_list = "_".join(results_json['results'][0]['data'][0]['row'][2]).replace(" ", "_")
print(combined_node_list)

string_out_list = [i.split('||', 1)[0] for i in string_out_list]

string_out_dict = dict(Counter(string_out_list).items())
pp.pprint(string_out_dict)

with open(os.path.join(write_dir,combined_node_list+".txt"), 'w') as convert_file:
    convert_file.write(json.dumps(string_out_dict))

Buprenorphine_CYP2D6_Tremor
{    'Buprenorphine -> biolink:affects -> CYP2D6': 1,
     'Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'Buprenorphine -> biolink:regulates -> CYP2D6': 1,
     'CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'CYP2D6 -> biolink:genetic_association -> Tremor': 2}


An additional notebook containing information about other access methods using Cypher, including through the ROBOKOP KG can be found in `HelloRobokop_RobokopKG.ipynb`.