The following functions are used send queries to Translator and display the results

In [29]:
#imports
import json
import requests
from collections import defaultdict
import pandas as pd
import copy
from datetime import datetime as dt
import urllib.parse
import time
from csv import reader

In [30]:
#ARS functions
def submit_to_ars(m,ars_url='https://ars.transltr.io/ars/api',arax_url='https://arax.ncats.io'):
    submit_url=f'{ars_url}/submit'
    response = requests.post(submit_url,json=m)
    try:
        message_id = response.json()['pk']
    except:
        print('fail')
        message_id = None
    print(f'{arax_url}/?source=ARS&id={message_id}')
    return message_id

def retrieve_ars_results(mid,ars_url='https://ars.transltr.io/ars/api'):
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    print( j['status'] )
    results = {}
    for child in j['children']:
        print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
        print( child['status'], child['actor']['agent'],nresults )
    return results

In [48]:
# helper functions
def translate_node_name(list_input, ontology_prefix, sort_by_ontology=False, log=False):
    '''
    translate array of values using the translator name resolver
    will return multiple rows if multiple results returned for one name
    ex: 
        list_test_result = translate(list_test, 'NCBIGene', sort_by_ontology=True)
    get:
        [('MT-ND2', 'NCBIGene:56168'), ('MT-ND2', 'NCBIGene:387315')]
    '''
    # initialize
    list_result = []

    # query for the list of names
    for name in list_input:
        #url_call = urllib.parse.quote(name)
        try:
            #response = requests.post(url_call)
            output_json = resolve_name(name)
            #output_json = response.json()
        except ValueError:
            print("got json error for {}, so skip".format(name))
            continue

        # parse
        for key, value in output_json.items():
            if ontology_prefix in key:
                list_result.append((name, key))
                #Cutting things off at one
                break

    if sort_by_ontology:
        list_result.sort(key = lambda x: int(x[1].split(":")[1]))

    # return
    return list_result

In [32]:
#utils
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [None]:
#Method to construct a simple one hop query.  Default values are set to the most general form
def construct_query(id0, type0=["biolink:NamedThing"],type1=["biolink:NamedThing"],
                    predicates=["biolink:related_to"]):
    with open('template.json','r') as inf:
        query = json.load(inf)
        query["message"]["query_graph"]["edges"]["e01"]["predicates"]=predicates
        query["message"]["query_graph"]["nodes"]["n0"]["ids"]=id0
        #query["message"]["query_graph"]["nodes"]["n1"]["ids"]=id1
        query["message"]["query_graph"]["nodes"]["n0"]["categories"]=type0
        query["message"]["query_graph"]["nodes"]["n1"]["categories"]=type1
        return query

In [None]:
def resolve_name(string):
    url_string=urllib.parse.quote(string)
    name_resolver_url="https://name-resolution-sri.renci.org/lookup?string="
    message_url = f'{name_resolver_url}{string}&offset=0&limit=10'
    response = requests.post(message_url)
    return response.json()

In [70]:
def construct_batch(ids0, type0=["biolink:NamedThing"],type1=["biolink:NamedThing"],
                    predicates=["biolink:related_to"]):
    query_list=[]
    for my_id in ids0:
        id_list = [my_id]
        query_list.append(construct_query(id_list,type0,type1,predicates))
    return query_list

In [36]:
def run_batch(query_list,delay=0):
    id_list=[]
    for query in query_list:
        id_list.append(submit_to_ars(query))
        time.sleep(delay)
    return id_list

In [63]:
def return_batch(id_list,delay=0):
    result_list=[]
    for my_id in id_list:
        result=retrieve_ars_results(my_id)
        result_list.append(result)
        time.sleep(delay)
    return result_list
        

In [38]:
def getpath_impl(j, fields, i):
    if(j is None or i>=len(fields)):
        return j
    field = fields[i]
    jNext = j[field] if field in j else None
    return getpath_impl(jNext, fields, i+1)

def getpath(j, fields):
    return getpath_impl(j, fields, 0)

In [39]:
def fetch_triple_from_kg(message,triple):
    kg = getpath(message,["message","knowledge_graph"])
    nodes = getpath(kg,["nodes"])
    edges = getpath(kg,["edges"])
    #print("Nodes "+str(nodes))
    node1=(getpath(nodes,[triple[0],"name"]))
    edge=(getpath(edges,[triple[1],"predicate"]))
    node2=(getpath(nodes,[triple[2],"name"]))
    if node1 is None:
        node1="Not Found"
    if edge is None:
        edge="Not Found"
    if node2 is None:
        node2="Not Found"
    fetched_triple=(node1,edge,node2)

    #print("Fetched "+str(fetched_triple))
    return fetched_triple
    #csv=getpath(nodes,[triple[0],"name"])+","+getpath(edges,[triple[1],"name"])+","+getpath(nodes,[triple[2],"name"])
    #print(csv)
        

def trapi_to_csv(message):
    results = getpath(message,["message","results"])
    #printjson(results)
    kg = getpath(message,["fields","data","message","knowledge_graph"])
    triples=[]
    csv_list=[]
    if results is None:
        #print("No results?")
        return None
    for result in results:
        #print("Result"+ str(result))
        node_bindings=getpath(result,["node_bindings"])
        edge_bindings=getpath(result,["edge_bindings"])
        node_ids=[]
        edge_ids=[]
        for binding in node_bindings:
            
            node_id=node_bindings[binding][0]["id"]
            node_ids.append(node_id)
        for binding in edge_bindings:
            edge_id=edge_bindings[binding][0]["id"]
            edge_ids.append(edge_id)
        #here we enter the realm of assumptions, namely that we are only working with one-hop queries
        my_tuple=(node_ids[0],edge_ids[0],node_ids[1])
        #print("TUPLE "+str(my_tuple))
        f_t=fetch_triple_from_kg(message,my_tuple)
        csv=f_t[0]+","+f_t[1]+","+f_t[2]
        csv_list.append(csv)
    return csv_list

        

In [11]:
 with open('exampleAnswer.json','r') as inf:
        trapi_to_csv(json.load(inf))

In [None]:
#my_s = "pancreatic cancer"
#resolved=resolve_name(my_s)
#printjson(resolved)

In [None]:
my_input ="NCBIGene:1956"
my_query = construct_query([my_input],[],["biolink:Gene"],["biolink:ChemicalEntity"])
print(my_query)

In [None]:
my_query = construct_query(["NCBIGene:23221"],["biolink:Gene"],["biolink:ChemicalEntity"])
printjson(my_query)
#my_id=submit_to_ars(my_query)
#time.sleep(300)
my_id="1a1aa294-a854-4857-a58c-ef5dfb273fa9"
my_json=retrieve_ars_results(my_id)

In [None]:
LIMIT =5000
#print(type(my_json))
#print(my_json.keys())
#printjson(my_json["kp-genetics"])
#csvs=trapi_to_csv(my_json["kp-genetics"])
#for csv in csvs:
#    print(str(csv))

for key in my_json.keys():
    print("Currently viewing results from "+key+"\n")
    #print(my_json[result])
    csvs=trapi_to_csv(my_json[key])
    if csvs is None:
        print("None CSVs")
        continue
    counter=0
    for csv in csvs:
        counter+=1
        print(csv)
        if(counter>LIMIT):
            break

In [86]:
def write_csvs(results_map,LIMIT=5000):
    for symbol in results_map.keys():
        for key in results_map[symbol].keys():
            f = open(symbol+"-"+key+".csv", "x")
            csvs=trapi_to_csv(results_map[symbol][key])
            if csvs is None:
                print("None results found for "+symbol+" "+key)
                continue
            counter=0
            for csv in csvs:
                counter+=1
                f.write(csv+"\n")
                if(counter>LIMIT):
                    break
            f.close()

In [56]:
def do_the_thing():
    INPUT = 'cluster_important_genes.csv'
    gene_symbols=[]
    with open(INPUT,'r') as inf:
        csv_reader = reader(inf)
        header = next(csv_reader)
        if header != None:
        # Iterate over each row after the header in the csv
            for row in csv_reader:
                gene_symbols.append(row[1])
    curie_tuples=translate_node_name(gene_symbols,"NCBIGene")
    curies =[]
    for c in curie_tuples:
        curies.append(c[1])
    return(curies)
        

In [57]:
curies = do_the_thing()

In [72]:
query_list = construct_batch(curies,["biolink:Gene"],["biolink:ChemicalEntity"])
#for q in query_list:
#    printjson(q)
mid_list=run_batch(query_list,30)

https://arax.ncats.io/?source=ARS&id=514f6176-e17b-49fc-85bf-69a807bbaf96
https://arax.ncats.io/?source=ARS&id=13e10f9e-b1f6-4a24-8af3-5742b1274bd1
https://arax.ncats.io/?source=ARS&id=504307e5-b051-4c5f-9383-0b92bd3af186
https://arax.ncats.io/?source=ARS&id=3d7416f8-fcd2-47b1-ae11-def40b6c12b8
https://arax.ncats.io/?source=ARS&id=d0644f48-c3c0-4e53-a0ba-c24f3a7b3823
https://arax.ncats.io/?source=ARS&id=5e8771b8-c43f-4026-838a-ba4a9e92c837
https://arax.ncats.io/?source=ARS&id=d33bf1be-70fe-497d-9d12-702a38bbb309
https://arax.ncats.io/?source=ARS&id=4f3e3453-84c2-436a-ab26-6d29729501b7
https://arax.ncats.io/?source=ARS&id=985cf80a-517c-46f8-87b8-62113d1f236a
https://arax.ncats.io/?source=ARS&id=d6af2477-74bf-478b-8170-0e28561bc6d8
https://arax.ncats.io/?source=ARS&id=cd257ccc-6c61-4ec4-a803-fb08e539ecce
https://arax.ncats.io/?source=ARS&id=8f926262-4ab3-445c-872a-c229a1ce3b26
https://arax.ncats.io/?source=ARS&id=0bea769c-1c04-4889-9185-dec50831ddb9
https://arax.ncats.io/?source=ARS&id=8

In [74]:
result_list = return_batch(mid_list,15)

Done
Done
Done ara-aragorn 0
Done
Done ara-arax 1
Done
Done ara-bte 0
Done
Done ara-unsecret 0
Done
Done kp-genetics 0
Done
Done kp-molecular 0
Done
Done ara-explanatory 1
Done
Done ara-improving 0
Done
Done kp-cam 0
Done
Done kp-textmining 0
Done
Done kp-openpredict 0
Done
Done kp-icees 0
Done
Done kp-chp 0
Done
ARS Error kp-cohd 0
Done
Done kp-icees-dili 0
Done
Done
Done ara-aragorn 0
Done
Done ara-bte 0
Done
Done ara-arax 1
Done
Done ara-unsecret 0
Done
Done kp-genetics 0
Done
Done kp-molecular 0
Done
Done ara-explanatory 1
Done
Done ara-improving 0
Done
Done kp-cam 0
Done
Done kp-textmining 0
Done
Done kp-openpredict 0
Done
Done kp-icees 0
Done
Done kp-chp 0
Done
ARS Error kp-cohd 0
Done
Done kp-icees-dili 0
Done
Done
Done ara-aragorn 344
Done
Done ara-arax 427
Done
Done ara-bte 0
Done
Done ara-unsecret 33
Done
Done kp-genetics 0
Done
Done kp-molecular 26
Done
Done ara-explanatory 450
Done
Done ara-improving 34
Done
Done kp-cam 0
Done
Done kp-textmining 28
Done
Done kp-openpredict 

In [75]:
#shouldn't have to do it this way, but we'll go back and fix it later
INPUT = 'cluster_important_genes.csv'
result_map={}
gene_list=[]
with open(INPUT,'r') as inf:
    counter=0
    csv_reader = reader(inf)
    header = next(csv_reader)
    if header != None:
    # Iterate over each row after the header in the csv
        for row in csv_reader:
            result_map[row[1]]=result_list[counter]
            counter+=1
            

In [87]:
write_csvs(result_map)

None results found for GPX1 ara-unsecret
None results found for KLF4 ara-unsecret
None results found for LUC7L3 ara-unsecret
None results found for NDUFB4 ara-unsecret
