In [12]:
# Parameter inputs
URL_node_normalizer = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
trapi_submit_url = "http://robokop-automat.apps.renci.org/robokopkg/1.4/query"

def URL_name_resolution_search(search_string):
    return(f'https://name-resolution-sri.renci.org/lookup?string={search_string}&offset=0&limit=10')

# Initializing directory to write
from datetime import datetime
from pathlib import Path

now = datetime.now()
dt_string = now.strftime("%Y-%m-%d_%H%M%S")
write_dir = Path("output/TRAPI",str(dt_string))
write_dir.mkdir(parents=True, exist_ok=True)

In [2]:
import requests
import json

TRAPI Documentation: https://github.com/NCATSTranslator/ReasonerAPI

Setup of the query below and details about TRAPI can be found in either the `HelloRobokop.ipynb` or `HelloRobokop_TRAPI.ipynb` notebooks.  Steps in this notebook below can also be used with ARAGORN as shown in either of the previous notebooks.

For a researcher who is starting from a `name` who wants to use TRAPI, they can use the Name Resolver tool to get a list of identifers for the nodes.  For example, finding other IDs related to `Buprenorphine` is below.

In [3]:
results = requests.post(URL_name_resolution_search('Buprenorphine'))
results_json = results.json()
#print(json.dumps(results_json,indent=4))

In [4]:
import pprint
pp = pprint.PrettyPrinter(indent=5)

In [8]:
# print(results_json)
input_node_id_list = []
for result in results_json:
    id = result['curie']
    if id not in input_node_id_list:
        input_node_id_list.append(id)

print(f"IDs related to 'Buprenorphine': {input_node_id_list}")


IDs related to 'Buprenorphine': ['PUBCHEM.COMPOUND:644073', 'PUBCHEM.COMPOUND:165429853', 'UMLS:C0615670', 'UMLS:C0524040', 'PUBCHEM.COMPOUND:57383391', 'UMLS:C3839045', 'UMLS:C1169989', 'PUBCHEM.COMPOUND:9811785', 'UMLS:C2827706', 'UMLS:C0572062']


For confirmation of the labels for each of these IDs, the Node Normalizer tool can be used to show contents.

In [9]:
nn_query = {
  "curies": input_node_id_list,
  "conflate": True
}
results = requests.post(URL_node_normalizer,json=nn_query)
pp.pprint(results.json())

{    'PUBCHEM.COMPOUND:165429853': {    'equivalent_identifiers': [    {    'identifier': 'PUBCHEM.COMPOUND:165429853',
                                                                            'label': '7-Dehydroxy '
                                                                                     'Buprenorphine '
                                                                                     '(Buprenorphine '
                                                                                     'Impurity '
                                                                                     'F)'}],
                                        'id': {    'identifier': 'PUBCHEM.COMPOUND:165429853',
                                                   'label': '7-Dehydroxy '
                                                            'Buprenorphine '
                                                            '(Buprenorphine '
                                                            

This query asks "Find me a Biological Process or Activity, or a Gene, or a Pathway that is related to both `HP:0001337` (Tremor) and any of the IDs related to `Buprenorphine` found searching the Name Resolver.  Note that the IDs submitted are inclusive and don't restrict the pathways.  For example, if submitting a list with IDs for `Buprenorphine` and `Brixadi`, the query searches for both individually, rather than looking for a pathway that has both `Buprenorphine` and `Brixadi` included.  See `HelloRobokop_Name_Tools.ipynb` for more details.

In [10]:
query={
    "message": {
      "query_graph": {
        "edges": {
          "e00": {
            "subject": "n00",
              "object": "n01",
          "predicates":["biolink:related_to"]
          },
          "e01": {
            "subject": "n01",
              "object": "n02",
          "predicates":["biolink:related_to"]
          }
        },
        "nodes": {
          "n00": {
            "ids": input_node_id_list,
            "categories": ["biolink:ChemicalEntity"]
          },
          "n01": {
              "categories": ["biolink:BiologicalProcessOrActivity","biolink:Gene","biolink:Pathway"]
          },
          "n02": {
            "ids": ["HP:0001337"],
            "categories": ["biolink:DiseaseOrPhenotypicFeature"]
          }
        }
      }
    }
  }

Like in `HelloRobokop_TRAPI.ipynb`, the TRAPI query can be sent to automat like below.  Because more search terms are being added to the query, more results are expected.  In the example with searching for some `gene` being related to `Buprenorphine` or `Tremors`, seven results were returned.  Now we are searching for some `gene` related to `Tremors` and any of the terms in a whole list.  In this example, eight results are returned below instead of seven.

In [13]:
response = requests.post(trapi_submit_url,json=query)
print(response.status_code)
number_pathway_results = len(response.json()['message']['results'])
print(len(response.json()['message']['results']))

200
8


Being a TRAPI query, the response follows the same format described in `HelloRobokop_TRAPI.ipynb`, here omitted.

The code below writes out all of the pathway results returned, NOT the edges for each pathway.

The additional result found in this example is shown along with the seven `Buprenorphine` results.  Here, the additional result is `Brixadi` -> `OPRM1` -> `Asterixis`

In [14]:
import pandas as pd
import os

kg = response.json()['message']['knowledge_graph']
cols = []
for node in sorted(response.json()['message']['results'][0]['node_bindings'].keys()):
    cols.append(node)
    cols.append(node + '_name')
results_df = pd.DataFrame(columns = cols)

results_list = []
for result in response.json()['message']['results']:
    result_dict = {}
    for node in sorted(result['node_bindings'].keys()):
        node_id = result['node_bindings'][node][0]['id']
        result_dict[node] = node_id
        result_dict[node + '_name'] = kg['nodes'][node_id]['name']
    # print(result_dict)

    results_list.append(pd.DataFrame([result_dict]))
results_df = pd.concat(results_list)
display(results_df)
results_df.to_csv(os.path.join(write_dir,'results_TRAPI_multi_ID.csv'), index=False)

combined_node_list = ["_".join([row[1].replace(" ", "_"), row[3].replace(" ", "_"), row[5].replace(" ", "_")]) for row in results_df[cols].to_numpy()]
pp.pprint(combined_node_list)

Unnamed: 0,n00,n00_name,n01,n01_name,n02,n02_name
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0025387,Pill-rolling tremor
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0200085,Limb tremor
0,PUBCHEM.COMPOUND:9811785,Buprenorphine(+),NCBIGene:4988,OPRM1,HP:0012164,Asterixis
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:4988,OPRM1,HP:0012164,Asterixis
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0001337,Tremor
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0002345,Action tremor
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0002322,Resting tremor
0,PUBCHEM.COMPOUND:644073,Buprenorphine,NCBIGene:1565,CYP2D6,HP:0002174,Postural tremor


[    'Buprenorphine_CYP2D6_Pill-rolling_tremor',
     'Buprenorphine_CYP2D6_Limb_tremor',
     'Buprenorphine(+)_OPRM1_Asterixis',
     'Buprenorphine_OPRM1_Asterixis',
     'Buprenorphine_CYP2D6_Tremor',
     'Buprenorphine_CYP2D6_Action_tremor',
     'Buprenorphine_CYP2D6_Resting_tremor',
     'Buprenorphine_CYP2D6_Postural_tremor']


The following writes out each unique edge for each of the pathways in the format of `subject` -> `predicate` -> `object` along with the count of how many times a given edge was present in the results. The latter corresponds to the number of different primary sources that included that edge.

In [16]:
from collections import Counter
import json
import pprint
pp = pprint.PrettyPrinter(indent=5)

for i in range(number_pathway_results):
    print(f"Pathway result: {combined_node_list[i]}")
    edge_bindings = response.json()['message']['results'][i]['analyses'][0]['edge_bindings']

    edge_ids = []
    for edge_name, edge_list in edge_bindings.items():
        edge_ids.append({edge_name: [x['id'] for x in edge_list]})

    string_out_list = []
    for edge_dict in edge_ids:
        for edge_name, edge_list in edge_dict.items():
            for edge_id in edge_list:
                subject_id = response.json()['message']['knowledge_graph']['edges'][edge_id]['subject']
                subject = response.json()['message']['knowledge_graph']['nodes'][subject_id]['name']
                predicate = response.json()['message']['knowledge_graph']['edges'][edge_id]['predicate']
                object_id = response.json()['message']['knowledge_graph']['edges'][edge_id]['object']
                object = response.json()['message']['knowledge_graph']['nodes'][object_id]['name']
                string_out = f"{subject} -> {predicate} -> {object}"
                string_out_list.append(string_out)
    string_out_dict = dict(Counter(string_out_list).items())
    pp.pprint(string_out_dict)
    print("")
    
    with open(os.path.join(write_dir,combined_node_list[i]+".txt"), 'w') as convert_file:
        convert_file.write(json.dumps(string_out_dict))
        

Pathway result: Buprenorphine_CYP2D6_Pill-rolling_tremor
{    'Buprenorphine -> biolink:affects -> CYP2D6': 2,
     'Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'CYP2D6 -> biolink:genetically_associated_with -> Pill-rolling tremor': 1}

Pathway result: Buprenorphine_CYP2D6_Limb_tremor
{    'Buprenorphine -> biolink:affects -> CYP2D6': 2,
     'Buprenorphine -> biolink:directly_physically_interacts_with -> CYP2D6': 1,
     'CYP2D6 -> biolink:affects -> Buprenorphine': 1,
     'CYP2D6 -> biolink:genetically_associated_with -> Limb tremor': 1}

Pathway result: Buprenorphine(+)_OPRM1_Asterixis
{    'Buprenorphine(+) -> biolink:binds -> OPRM1': 1,
     'OPRM1 -> biolink:genetically_associated_with -> Asterixis': 1}

Pathway result: Buprenorphine_OPRM1_Asterixis
{    'Buprenorphine -> biolink:affects -> OPRM1': 5,
     'Buprenorphine -> biolink:binds -> OPRM1': 1,
     'Buprenorphine -> biolink:directly