The following functions are used send queries to Translator and display the results

In [1]:
#imports
import json
import requests
from collections import defaultdict
import pandas as pd
import copy
from datetime import datetime as dt
import urllib.parse
import time
from csv import reader
import os

ModuleNotFoundError: No module named 'pandas'

In [2]:
#ARS functions
def submit_to_ars(m,ars_url='https://ars.transltr.io/ars/api',arax_url='https://arax.ncats.io'):
    submit_url=f'{ars_url}/submit'
    response = requests.post(submit_url,json=m)
    try:
        message_id = response.json()['pk']
    except:
        print('fail')
        message_id = None
    print(f'{arax_url}/?source=ARS&id={message_id}')
    return message_id

def retrieve_ars_results(mid,ars_url='https://ars.transltr.io/ars/api'):
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    print( j['status'] )
    results = {}
    for child in j['children']:
        #print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
        #print( child['status'], child['actor']['agent'],nresults )
    return results

In [3]:
# helper functions
def translate_node_name(list_input, ontology_prefix, sort_by_ontology=False, log=False):
    '''
    translate array of values using the translator name resolver
    will return multiple rows if multiple results returned for one name
    ex: 
        list_test_result = translate(list_test, 'NCBIGene', sort_by_ontology=True)
    get:
        [('MT-ND2', 'NCBIGene:56168'), ('MT-ND2', 'NCBIGene:387315')]
    '''
    # initialize
    list_result = []

    # query for the list of names
    for name in list_input:
        #url_call = urllib.parse.quote(name)
        try:
            #response = requests.post(url_call)
            output_json = resolve_name(name)
            #output_json = response.json()
        except ValueError:
            print("got json error for {}, so skip".format(name))
            continue

        # parse
        for key, value in output_json.items():
            if ontology_prefix in key:
                list_result.append((name, key))
                #Cutting things off at one
                break

    if sort_by_ontology:
        list_result.sort(key = lambda x: int(x[1].split(":")[1]))

    # return
    return list_result

In [4]:
#utils
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [5]:
def resolve_name(string):
    url_string=urllib.parse.quote(string)
    name_resolver_url="https://name-resolution-sri.renci.org/lookup?string="
    message_url = f'{name_resolver_url}{string}&offset=0&limit=10'
    response = requests.post(message_url)
    return response.json()

In [47]:
def getpath_impl(j, fields, i):
    if(j is None or i>=len(fields)):
        return j
    field = fields[i]
    jNext = j[field] if field in j else None
    return getpath_impl(jNext, fields, i+1)

def getpath(j, fields):
    return getpath_impl(j, fields, 0)

In [48]:
def get_message_from_disk(file="araxResultsSpringHackathon.json"):
    with open(file, 'r') as f:
      data = json.load(f)
    return data

In [49]:
def get_knowledge_graph(message):
    return getpath(message,["fields","data","message","knowledge_graph"])

In [50]:
#Returns `nodes` from either a TRAPI knowledge_graph or full response
def get_nodes(json):
    nodes=None
    if "model" in json:
        #We're looking at a full TRAPI message
        nodes = getpath(json,["fields","data","message","knowledge_graph","nodes"])
    elif "nodes" in json:
        #We're looking at just the KG
        nodes = getpath(json,["nodes"])
    return nodes
        

In [95]:
#Returns `edges` from either a TRAPI knowledge_graph or full response
def get_edges(json):
    edges=None
    if "model" in json:
        #We're looking at a full TRAPI message
        edges = getpath(json,["fields","data","message","knowledge_graph","edges"])
    elif "edges" in json:
        #We're looking at just the KG
        edges = getpath(json,["edges"])
    return edges

In [97]:
def get_results(json):
    return getpath(json,["fields","data","message","results"])

In [118]:
def edge_contains_semmed(edge):
    sources= ["biolink:aggregator_knowledge_source","biolink:knowledge_source"]
    attributes = getpath(edge,["attributes"])
    semmed=False
    #print_json(edge)
    for attribute in attributes:
        attribute_id = getpath(attribute,["attribute_type_id"])
        if attribute_id in sources:
            value = getpath(attribute,["value"])
            #print("value: "+value)
            if value is not None:
                if "semmeddb" in value:
                    semmed=True
                
    return semmed
        

In [87]:
example = get_message_from_disk()
kg = get_knowledge_graph(example)

In [100]:
edges = get_edges(example)
nodes = get_nodes(example)
results = get_results(example)

In [119]:
total_edges = len(edges.keys())
semmed_count = 0
for key in edges.keys():
    
    if(edge_contains_semmed(edges[key])):
        semmed_count+=1
        #print(key)
print(semmed_count+" of "+total_e)

infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL472-biolink:treats-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL1201866-biolink:treats-MONDO:0004946
infores:rtx-kg2:DRUGBANK:DB01404-biolink:prevents-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL1908360-biolink:treats-MONDO:0002909
infores:rtx-kg2:CHEBI:81572-biolink:prevents-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL245067-biolink:treats-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL1201866-biolink:treats-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL408403-biolink:affects-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL1200559-biolink:treats-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL245067-biolink:disrupts-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL2095209-biolink:disrupts-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL1237-biolink:prevents-MONDO:0002909
infores:rtx-kg2:CHEMBL.COMPOUND:CHEMBL3322001-biolink:affects-MONDO:0002909
infores:rtx-kg2:UniProtKB:P04040-biolink:treats-MONDO: