The following functions are used send queries to Translator and display the results

In [2]:
#imports
import json
import requests
from collections import defaultdict
import pandas as pd
import copy
from datetime import datetime as dt
import urllib.parse
import time

In [3]:
#ARS functions
def submit_to_ars(m,ars_url='https://ars.transltr.io/ars/api',arax_url='https://arax.ncats.io'):
    submit_url=f'{ars_url}/submit'
    response = requests.post(submit_url,json=m)
    try:
        message_id = response.json()['pk']
    except:
        print('fail')
        message_id = None
    print(f'{arax_url}/?source=ARS&id={message_id}')
    return message_id

def retrieve_ars_results(mid,ars_url='https://ars.transltr.io/ars/api'):
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    print( j['status'] )
    results = {}
    for child in j['children']:
        print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
        print( child['status'], child['actor']['agent'],nresults )
    return results

In [None]:
# helper functions
def translate(list_input, ontology_prefix, sort_by_ontology=False, log=False):
    '''
    translate array of values using the translator name resolver
    will return multiple rows if multiple results returned for one name
    ex: 
        list_test_result = translate(list_test, 'NCBIGene', sort_by_ontology=True)
    get:
        [('MT-ND2', 'NCBIGene:56168'), ('MT-ND2', 'NCBIGene:387315')]
    '''
    # initialize
    list_result = []

    # query for the list of names
    for name in list_input:
        url_call = url_name_resolver.format(name)
        try:
            response = requests.post(url_call)
            output_json = response.json()
        except ValueError:
            print("got json error for {}, so skip".format(name))
            continue

        # parse
        for key, value in output_json.items():
            if ontology_prefix in key:
                list_result.append((name, key))

    if sort_by_ontology:
        list_result.sort(key = lambda x: int(x[1].split(":")[1]))

    # return
    return list_result

In [4]:
#utils
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [5]:
#Method to construct a simple one hop query.  Default values are set to the most general form
def construct_query(id0, id1=[], type0=["biolink:NamedThing"],type1=["biolink:NamedThing"],
                    predicates=["biolink:related_to"]):
    with open('template.json','r') as inf:
        query = json.load(inf)
        query["message"]["query_graph"]["edges"]["e01"]["predicates"]=predicates
        query["message"]["query_graph"]["nodes"]["n0"]["ids"]=id0
        query["message"]["query_graph"]["nodes"]["n1"]["ids"]=id1
        query["message"]["query_graph"]["nodes"]["n0"]["categories"]=type0
        query["message"]["query_graph"]["nodes"]["n1"]["categories"]=type1
        return query

In [6]:
def resolve_name(string):
    url_string=urllib.parse.quote(string)
    name_resolver_url="https://name-resolution-sri.renci.org/lookup?string="
    message_url = f'{name_resolver_url}{string}&offset=0&limit=10'
    response = requests.post(message_url)
    return response.json()

In [None]:
def construct_batch(ids0, id1=[], type0=["biolink:NamedThing"],type1=["biolink:NamedThing"],
                    predicates=["biolink:related_to"]):
    query_list=[]
    for my_id in ids0:
        query_list.append(construct_query(my_id,id1,type0,type1,predicates))
    return query_list

In [None]:
def run_batch(query_list,delay=0)
    id_list=[]
    for query in query_list:
        id_list.append(submit_to_ars(query))
        time.sleep(delay)
    return id_list

In [None]:
def return_batch(id_list):
    result_list=[]
    for my_id in id_list:
        result=retrieve_ars_results(my_id)
        result_list.append(result)
        time.sleep(delay)
    return result_list
        

In [9]:
def getpath_impl(j, fields, i):
    if(j is None or i>=len(fields)):
        return j
    field = fields[i]
    jNext = j[field] if field in j else None
    return getpath_impl(jNext, fields, i+1)

def getpath(j, fields):
    return getpath_impl(j, fields, 0)

In [79]:
def fetch_triple_from_kg(message,triple):
    kg = getpath(message,["fields","data","message","knowledge_graph"])
    nodes = getpath(kg,["nodes"])
    edges = getpath(kg,["edges"])
    node1=(getpath(nodes,[triple[0],"name"]))
    edge=(getpath(edges,[triple[1],"predicate"]))
    node2=(getpath(nodes,[triple[2],"name"]))
    fetched_triple=(node1,edge,node2)
    return fetched_triple
    #csv=getpath(nodes,[triple[0],"name"])+","+getpath(edges,[triple[1],"name"])+","+getpath(nodes,[triple[2],"name"])
    #print(csv)
        

def trapi_to_csv(message):
    results = getpath(message,["fields","data","message","results"])
    #printjson(results)
    kg = getpath(message,["fields","data","message","knowledge_graph"])
    triples=[]
    for result in results:
        node_bindings=getpath(result,["node_bindings"])
        edge_bindings=getpath(result,["edge_bindings"])
        node_ids=[]
        edge_ids=[]
        for binding in node_bindings:
            
            node_id=node_bindings[binding][0]["id"]
            node_ids.append(node_id)
            print(node_id)
        for binding in edge_bindings:
            edge_id=edge_bindings[binding][0]["id"]
            edge_ids.append(edge_id)
            print(edge_id)
        #here we enter the realm of assumptions, namely that we are only working with one-hop queries
        my_tuple=(node_ids[0],edge_ids[0],node_ids[1])
        f_t=fetch_triple_from_kg(message,my_tuple)
        csv=f_t[0]+","+f_t[1]+","+f_t[2]
        print(csv)
        

        

In [80]:
 with open('exampleAnswer.json','r') as inf:
        trapi_to_csv(json.load(inf))

UMLS:C0028944
NCBIGene:5354
c052d6d04324262b39e029ae01e6b2ed
Oligodendroglia,biolink:location_of,PLP1


In [7]:
my_s = "pancreatic cancer"
resolved=resolve_name(my_s)
printjson(resolved)

{
    "MONDO:0021040": [
        "Pancreatic Cancer",
        "pancreatic cancer",
        "Pancreatic cancer",
        "Pancreatic cancer",
        "pancreatic cancers",
        "Pancreatic Cancers",
        "Cancer, Pancreatic",
        "Cancers, Pancreatic",
        "CA - Pancreatic cancer",
        "tumor pancreatic cancer",
        "pancreatic tumor or cancer",
        "increased risk of pancreatic cancer",
        "Increased risk of pancreatic cancer",
        "increased risk of pancreatic cancer",
        "pancreas",
        "pancreas ca",
        "Pancreas Tumor",
        "tumor pancreas",
        "PANCREAS TUMOR",
        "PANCREAS NEOPL",
        "pancreas tumor",
        "Pancreas Cancer",
        "Pancreas cancer",
        "pancreas tumors",
        "PANCREAS CANCER",
        "tumour pancreas",
        "pancreas tumour",
        "pancreas cancer",
        "Pancreas Cancers",
        "Pancreatic tumor",
        "Cancer, Pancreas",
        "Pancreas--Tumors",
        "pancrea

In [8]:
my_query = construct_query(["NCBIGene:23221"])
printjson(my_query)

{
    "message": {
        "query_graph": {
            "edges": {
                "e01": {
                    "object": "n0",
                    "subject": "n1",
                    "predicates": [
                        "biolink:related_to"
                    ]
                }
            },
            "nodes": {
                "n0": {
                    "ids": [
                        "NCBIGene:23221"
                    ],
                    "categories": [
                        "biolink:NamedThing"
                    ]
                },
                "n1": {
                    "categories": [
                        "biolink:NamedThing"
                    ],
                    "ids": []
                }
            }
        }
    }
}


In [6]:
with open('A.0_RHOBTB2_direct.json','r') as inf:
    query_0 = json.load(inf)
printjson(query_0)

{
    "message": {
        "query_graph": {
            "edges": {
                "e01": {
                    "object": "n0",
                    "subject": "n1",
                    "predicates": [
                        "biolink:entity_negatively_regulates_entity",
                        "biolink:decreases_abundance_of",
                        "biolink:decreases_expression_of",
                        "biolink:decreases_stability_of",
                        "biolink:decreases_uptake_of",
                        "biolink:increases_degradation_of",
                        "biolink:decreases_synthesis_of",
                        "biolink:decreases_activity_of"
                    ]
                }
            },
            "nodes": {
                "n0": {
                    "ids": [
                        "NCBIGene:23221"
                    ],
                    "categories": [
                        "biolink:Gene"
                    ]
                },
              

In [None]:
pk_0 = submit_to_ars(query_0)