In [12]:
# Parameter inputs
robokopkg_bolt_url = "bolt://robokopkg.renci.org:7687"
user = 'neo4j'
pw = ''

# Initializing directory to write
from datetime import datetime
from pathlib import Path

now = datetime.now()
dt_string = now.strftime("%Y-%m-%d_%H%M%S")
write_dir = Path("output/Cypher_robokopkg",str(dt_string))
write_dir.mkdir(parents=True, exist_ok=True)

# Initializing Neo4j connection class
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [13]:
import requests
import json

In [14]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

## Cypher - robokopkg.renci.org

A separate access point to ROBOKOP can be found at `robokopkg.renci.org` as compared to the method outlined in `HelloRobokop_Cypher.ipynb`.  A cypher query here is sent to http://robokopkg.renci.org using the bolt protocol.  After defining the helper Neo4jConnection class, the query is sent and results can be extracted below.

In [15]:
# Buprenorphine -> [Gene] -> Tremor
cypher = f"""MATCH (n0_0:`biolink:ChemicalEntity`)-[r0_0]-(n1_0:`biolink:Gene`)-[r1_0]-(n2_0:`biolink:DiseaseOrPhenotypicFeature`)
WHERE n0_0.name IN ['Buprenorphine'] AND n2_0.name IN ['Tremor']
RETURN [startNode(r0_0),[type(r0_0),properties(r0_0)],endNode(r0_0)] as edge_1,
[startNode(r1_0),[type(r1_0),properties(r1_0)],endNode(r1_0)] as edge_2,
[n0_0.name, n1_0.name, n2_0.name] as node_names LIMIT 100"""

In [16]:
conn = Neo4jConnection(uri=robokopkg_bolt_url, user = user, pwd = pw)
record_list = conn.query(cypher)


A list of records is returned from the query. The structure of each record is defined by the `RETURN` section of the query above.
```
<Record edge_1=[<Node element containing properties for first node of r0_0>,
                 [list containing the type and properties for the edge],
                 <Node element containing properties for second node of r0_0>]
         edge_2=[<Node element containing properties for first node of r1_0>,
                 [list containing the type and properties for the edge],
                 <Node element containing properties for second node of r1_0>]
         node_names=[list of node names]>
```

In [17]:
record = record_list[0]
print(record)

<Record edge_1=[<Node element_id='8421444' labels=frozenset({'biolink:NamedThing', 'biolink:PhysicalEssenceOrOccurrent', 'biolink:ChemicalOrDrugOrTreatment', 'biolink:ChemicalEntity', 'biolink:Entity', 'biolink:PhysicalEssence', 'biolink:ChemicalEntityOrProteinOrPolypeptide', 'biolink:SmallMolecule', 'biolink:MolecularEntity', 'biolink:ChemicalEntityOrGeneOrGeneProduct'}) properties={'CHEBI_ROLE_delta_opioid_agent': True, 'smiles': 'CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1CC4=CC=C(O)C5=C4[C@@]3(CCN1CC1CC1)[C@H]2O5', 'description': 'A morphinane alkaloid that is 7,8-dihydromorphine 6-O-methyl ether in which positions 6 and 14 are joined by a -CH2CH2- bridge, one of the hydrogens of the N-methyl group is substituted by cyclopropyl, and a hydrogen at position 7 is substituted by a 2-hydroxy-3,3-dimethylbutan-2-yl group. It is highly effective for the treatment of opioid use disorder and is also increasingly being used in the treatment of chronic pain.', 'fda_labels': 74, 'rgb': 

The data can be accessed using the data() method. Known keys can be passed to data(), but leaving it blank will return everything as a Dictionary. Data in records are returned with keys based on the original query labels. 

In [18]:
record_data = record.data()
pp.pprint(record_data)

{    'edge_1': [    {    'CHEBI_ROLE_agonist': True,
                         'CHEBI_ROLE_analgesic': True,
                         'CHEBI_ROLE_antagonist': True,
                         'CHEBI_ROLE_delta_opioid_agent': True,
                         'CHEBI_ROLE_delta_opioid_receptor_antagonist': True,
                         'CHEBI_ROLE_drug': True,
                         'CHEBI_ROLE_kappa_opioid_agent': True,
                         'CHEBI_ROLE_kappa_opioid_receptor_antagonist': True,
                         'CHEBI_ROLE_mu_opioid_agent': True,
                         'CHEBI_ROLE_mu_opioid_receptor_agonist': True,
                         'CHEBI_ROLE_neurotransmitter_agent': True,
                         'CHEBI_ROLE_opioid_agent': True,
                         'CHEBI_ROLE_opioid_analgesic': True,
                         'CHEBI_ROLE_opioid_receptor_agonist': True,
                         'CHEBI_ROLE_opioid_receptor_antagonist': True,
                         'CHEBI_ROLE_pha

Results are extracted and stored in the format of subject -> predicate -> object, followed by the remaining edge properties. Including the edge properties helps to distinguish edges that may have the same predicates. Unique entries are appended to a list, counted, and then written to a text file.

The code below extracts results based on the structure of the original cypher query in the section above.  Any changes to the `RETURN` part of the query will require adjustments to the code below.

In [19]:
import os
from collections import Counter

string_out_list = []
for record in record_list: 
    record_data = record.data()
    #only grab the edge information and skip the list of node names
    record_data_first2 = {k: record_data[k] for k in list(record_data)[:2]}
    for label, data in record_data_first2.items():
        string_out = f"{label} - {data[0]['name']} -> {data[1][0]} -> {data[2]['name']}||{data[1][1]}"
        # print(f"{data[0]['name']} -> {data[1][0]} -> {data[2]['name']}")
        # print(f"Edge properties: {data[1][1]}")
        if string_out not in string_out_list:
            string_out_list.append(string_out)

combined_node_list = "_".join(list(record_list[0].data('node_names').values())[0])
print(combined_node_list)

string_out_list = [i.split('||', 1)[0] for i in string_out_list]

string_out_dict = dict(Counter(string_out_list).items())
pp.pprint(string_out_dict)

Buprenorphine_CYP2D6_Tremor
{    'edge_1 - Buprenorphine -> biolink:affects -> CYP2D6': 1,
     'edge_1 - Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'edge_1 - Buprenorphine -> biolink:regulates -> CYP2D6': 1,
     'edge_1 - CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'edge_2 - CYP2D6 -> biolink:genetic_association -> Tremor': 2}


Writing results below after confirming that the output looks good.

In [20]:
with open(os.path.join(write_dir,combined_node_list+".txt"), 'w') as convert_file:
    convert_file.write(json.dumps(string_out_dict))

Close the Cypher connection when finished.

In [21]:
conn.close()