The following functions are used send queries to Translator and display the results

In [122]:
#imports
import json
import requests
from datetime import datetime as dt
import urllib.parse
import time
import os

In [123]:
#ARS functions
def submit_to_ars(m,ars_url='https://ars.transltr.io/ars/api',arax_url='https://arax.ncats.io'):
    submit_url=f'{ars_url}/submit'
    response = requests.post(submit_url,json=m)
    try:
        message_id = response.json()['pk']
    except:
        print('fail')
        message_id = None
    print(f'{arax_url}/?source=ARS&id={message_id}')
    return message_id

def retrieve_ars_results(mid,ars_url='https://ars.transltr.io/ars/api'):
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    print( j['status'] )
    results = {}
    for child in j['children']:
        #print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
        #print( child['status'], child['actor']['agent'],nresults )
    return results

In [125]:
#utils
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [126]:
def resolve_name(string):
    url_string=urllib.parse.quote(string)
    name_resolver_url="https://name-resolution-sri.renci.org/lookup?string="
    message_url = f'{name_resolver_url}{string}&offset=0&limit=10'
    response = requests.post(message_url)
    return response.json()

In [127]:
def getpath_impl(j, fields, i):
    if(j is None or i>=len(fields)):
        return j
    field = fields[i]
    jNext = j[field] if field in j else None
    return getpath_impl(jNext, fields, i+1)

def getpath(j, fields):
    return getpath_impl(j, fields, 0)

In [128]:
def get_message_from_disk(file="araxResultsSpringHackathon.json"):
    with open(file, 'r') as f:
      data = json.load(f)
    return data

In [129]:
def get_knowledge_graph(message):
    return getpath(message,["fields","data","message","knowledge_graph"])

In [130]:
#Returns `nodes` from either a TRAPI knowledge_graph or full response
def get_nodes(json):
    nodes=None
    if "model" in json:
        #We're looking at a full TRAPI message
        nodes = getpath(json,["fields","data","message","knowledge_graph","nodes"])
    elif "nodes" in json:
        #We're looking at just the KG
        nodes = getpath(json,["nodes"])
    return nodes
        

In [131]:
#Returns `edges` from either a TRAPI knowledge_graph or full response
def get_edges(json):
    edges=None
    if "model" in json:
        #We're looking at a full TRAPI message
        edges = getpath(json,["fields","data","message","knowledge_graph","edges"])
    elif "edges" in json:
        #We're looking at just the KG
        edges = getpath(json,["edges"])
    return edges

In [132]:
def get_results(json):
    return getpath(json,["fields","data","message","results"])

In [178]:
def edge_contains_semmed(edge):
    sources= ["attribute_source"]
    attributes = getpath(edge,["attributes"])
    semmed=False
    if attributes is not None:
        for attribute in attributes:
            if "attribute_source" in attribute:
                source = getpath(attribute,['attribute_source'])
                if source == "infores:semmeddb":
                    semmed=True

    return semmed
        

In [200]:
def only_semmed_results(results,edges):
    sem_results = []
    for result in results:
        #print(getpath(result,['essence']))
        only_sem=True
        bindings = getpath(result,['edge_bindings'])
        for key,binding in bindings.items():
            for b in binding:
                
                #print(type(key),type(binding))
                eid = getpath(b,['id'])
                #print(eid)
                edge = get_edge_by_id(edges,eid)
                if getpath(edge,['predicate'])=="biolink:has_normalized_google_distance_with":
                    continue
                #print(getpath(edge,['predicate']))
                if not edge_contains_semmed(edge):
                    #print(getpath(edge,['predicate']))
                    only_sem=False
        if only_sem:
            sem_results.append(result)
    return sem_results
            

In [137]:
def get_edge_by_id(edges,edge_id):
    return(getpath(edges,[edge_id]))

In [139]:
def get_node_by_id(nodes, node_id):
    return(getpath(nodes,[node_id]))

In [142]:
def get_node_ids_from_edge(edge):
    nodes=[]
    nodes.append(getpath(edge,['subject']))
    nodes.append(getpath(edge,['object']))
    return nodes

In [148]:
def get_categories_from_node(node):
    return getpath(node,['categories'])

In [153]:
def get_category_count(edges,nodes):
    cat_map={}
    for edge in edges:
        node_ids = get_node_ids_from_edge(edge)
        for nid in node_ids:
            categories = get_categories_from_node(get_node_by_id(nodes,nid))
            for cat in categories:
                if cat in cat_map:
                    cat_map[cat]=cat_map[cat]+1
                else:
                    cat_map[cat]=1
    return cat_map

In [156]:
def get_predicate_count(edges):
    pred_map={}
    for edge in edges:
        predicate = getpath(edge,['predicate'])
        if predicate in pred_map:
            pred_map[predicate]=pred_map[predicate]+1
        else:
            pred_map[predicate]=1
    return pred_map

In [163]:
def analyze(json):
    edges = get_edges(json)
    nodes = get_nodes(json)
    total_edges = len(edges.keys())
    semmedges=[]
    for key in edges.keys():
        if(edge_contains_semmed(edges[key])):
            semmedges.append(edges[key])
    semmed_count = len(semmedges)
    print(str(semmed_count)+" of "+str(total_edges)+" edges came from semmed")
    cat_map=get_category_count(semmedges,nodes)
    pred_map=get_predicate_count(semmedges)
    print(cat_map)
    print(pred_map)

In [164]:
example = get_message_from_disk()
kg = get_knowledge_graph(example)

In [165]:
edges = get_edges(example)
nodes = get_nodes(example)
results = get_results(example)

In [168]:
analyze(example)

967 of 1458 edges came from semmed
{'biolink:SmallMolecule': 967, 'biolink:PhenotypicFeature': 967}
{'biolink:treats': 293, 'biolink:prevents': 212, 'biolink:affects': 217, 'biolink:disrupts': 132, 'biolink:entity_positively_regulates_entity': 78, 'biolink:has_normalized_google_distance_with': 35}


In [206]:
only_sem = only_semmed_results(results,edges)
print(str(len(only_sem))+" of "+str(len(results))+" results have only semmeddb as a source")


474 of 500 results have only semmeddb as a source
