In [86]:
import requests
import json

In [101]:
ARS_CI = "https://ars.ci.transltr.io/ars/api"
ARS_TEST = "https://ars.test.transltr.io/ars/api"
ARS_CI_SUBMIT = f"{ARS_CI}/submit"
ARS_TEST_SUBMIT = f"{ARS_TEST}/submit"
ARAGORN_CI = "https://aragorn.ci.transltr.io/aragorn/query"
ARAGORN_TEST = "https://aragorn.test.transltr.io/aragorn/query"
ARAGORN_DEV = "https://aragorn.renci.org/aragorn/query"
NN = "https://nodenormalization-sri.renci.org/"

In [102]:
def get_setid(curie_list):
    payload = [
      {
        "curies": curie_list,
        "conflations": [
          "GeneProtein",
          "DrugChemical"
        ]
      }
    ]
    response = requests.post(f"{NN}get_setid", json = payload)
    setid = response.json()[0]["setid"]
    return setid
    

In [103]:
def add_kg(envelope, input_curies, input_node_type, setid):
    nodes = envelope["message"]["knowledge_graph"]["nodes"]
    edges = envelope["message"]["knowledge_graph"]["edges"]
    nodes[setid] = { "categories": [input_node_type], "name": "set1", "is_set": True, "attributes": [] }
    for curie in input_curies:
        nodes[curie] = { "categories": [input_node_type], "name": "set1", "is_set": True, "attributes": [] }
        decolon = "_".join( curie.split(":") )
        edgename = f"{decolon}_member_of_set1"
        edges[edgename] = { "attributes": [], "subject": curie, "predicate": "biolink:member_of", "object": setid,
                    "sources": [
                        {
                            "resource_id": "infores:user-interface",
                            "resource_role": "primary_knowledge_source"
                        }
                    ]
                }

def generate_trapi_query(input_node_type,output_node_type,input_curies,predicate,input_is_subject=True):
    setid = get_setid(input_curies)
    envelope = {"message":{"query_graph": {"nodes":{"input":{},"output":{}}, "edges":{"edge_0":{}}}, 
                           "knowledge_graph": {"nodes": {}, "edges": {}}}}
    input_node = envelope["message"]["query_graph"]["nodes"]["input"]
    input_node["categories"] = [input_node_type]
    input_node["ids"] = [setid]
    input_node["member_ids"] = input_curies
    input_node["set_interpretation"]="MANY"
    output_node = envelope["message"]["query_graph"]["nodes"]["output"]
    output_node["categories"] = [output_node_type]
    query_edge = envelope["message"]["query_graph"]["edges"]["edge_0"]
    if input_is_subject:
        query_edge["subject"] = "input"
        query_edge["object"] = "output"
    else:
        query_edge["subject"] = "output"
        query_edge["object"] = "input"
    query_edge["predicates"]  = [ predicate ]
    query_edge["knowledge_type"] = "inferred"
    add_kg( envelope, input_curies, input_node_type, setid )
    envelope["bypass_cache"] = True
    envelope["parameters"] = {
        "timeout_seconds": 3600,
        "kp_timeout": 300,
    }
    return envelope

In [104]:
def generate_phenotypes_to_gene(phenotypes):
    #Phenotype to Gene
    input_type = "biolink:PhenotypicFeature"
    output_type = "biolink:Gene"
    predicate = "biolink:genetically_associated_with"
    input_is_subject = "True"
    query = generate_trapi_query( input_type, output_type, phenotypes, predicate, input_is_subject)
    return query

In [105]:
def retrieve_ars_results(mid,ars_url='https://ars.transltr.io/ars/api'):
    message_url = f'{ars_url}/messages/{mid}?trace=y'
    response = requests.get(message_url)
    j = response.json()
    #print( j['status'] )
    results = {}
    for child in j['children']:
        #print(child['status'])
        if child['status']  == 'Done':
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                nresults = len(child_response['fields']['data']['message']['results'])
                if nresults > 0:
                    results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                nresults=0
                child['status'] = 'ARS Error'
        elif child['status'] == 'Error':
            nresults=0
            childmessage_id = child['message']
            child_url = f'{ars_url}/messages/{childmessage_id}'
            try:
                child_response = requests.get(child_url).json()
                results[child['actor']['agent']] = {'message':child_response['fields']['data']['message']}
            except Exception as e:
                #print(e)
                child['status'] = 'ARS Error'
        else:
            nresults = 0
        #print( child['status'], child['actor']['agent'],nresults )
    return results

In [106]:
tbcd_phenotypes=[
    "HP:0002098", # | Respiratory distress, 
    "HP:0001252", # | Hypotonia, 
    "HP:0001250"  # | Seizure
]
query = generate_phenotypes_to_gene(tbcd_phenotypes)
print( json.dumps( query, indent=2) )

{
  "message": {
    "query_graph": {
      "nodes": {
        "input": {
          "categories": [
            "biolink:PhenotypicFeature"
          ],
          "ids": [
            "uuid:51d8bb77-e107-502a-84bc-15043aff3ab2"
          ],
          "member_ids": [
            "HP:0002098",
            "HP:0001252",
            "HP:0001250"
          ],
          "set_interpretation": "MANY"
        },
        "output": {
          "categories": [
            "biolink:Gene"
          ]
        }
      },
      "edges": {
        "edge_0": {
          "subject": "input",
          "object": "output",
          "predicates": [
            "biolink:genetically_associated_with"
          ],
          "knowledge_type": "inferred"
        }
      }
    },
    "knowledge_graph": {
      "nodes": {
        "uuid:51d8bb77-e107-502a-84bc-15043aff3ab2": {
          "categories": [
            "biolink:PhenotypicFeature"
          ],
          "name": "set1",
          "is_set": true,
          "

In [107]:
response = requests.post( ARS_CI_SUBMIT, json = query )
print(response.status_code)
pk = response.json()["pk"]
print(pk)

201
3ebed631-3ecd-436e-bc07-472d9cc13d3e


In [108]:
def print_results(m):
    results = m["message"]["results"]
    input_set_id = m["message"]["query_graph"]["nodes"]["input"]["ids"][0]
    for i,result in enumerate(results):
        result_id = result["node_bindings"]["output"][0]["id"]
        result_name = m["message"]["knowledge_graph"]["nodes"][result_id]["name"]
        print(f"{i} {result_name}")
        for analysis in result["analyses"]:
            eid = analysis["edge_bindings"]["edge_0"][0]["id"]
            edge = m["message"]["knowledge_graph"]["edges"][eid]
            atts = { att["attribute_type_id"]: att["value"] for att in edge["attributes"] }
            sgs = atts["biolink:support_graphs"]
            used = set()
            for sg in sgs:
                sg_edges = m["message"]["auxiliary_graphs"][sg]["edges"]
                if len(sg_edges) != 3:
                    print("direct path?")
                for sg_eid in sg_edges:
                    sg_edge = m["message"]["knowledge_graph"]["edges"][sg_eid]
                    for side in ("subject", "object"):
                        nid = sg_edge[side]
                        if nid in used:
                            continue
                        if not nid.startswith("uuid"):
                            continue
                        if nid == input_set_id:
                             continue   
                        group_node = m["message"]["knowledge_graph"]["nodes"][nid]
                        used.add(nid)
                        print(group_node["categories"])
        print("==========================")

In [109]:
r = retrieve_ars_results(pk,ars_url = ARS_CI)
m = r["ara-aragorn"]
print_results(m)

0 HTR1A
['biolink:ChemicalEntity']
1 CDKN1A
['biolink:ChemicalEntity']
2 ABCB1
['biolink:ChemicalEntity']
3 CYP3A5
['biolink:ChemicalEntity']
4 Htr1a
['biolink:ChemicalEntity']
5 CASP8
['biolink:ChemicalEntity']
6 HTR2A
['biolink:ChemicalEntity']
7 CASP7
['biolink:ChemicalEntity']
8 CYP3A4
['biolink:ChemicalEntity']
9 DRD1
['biolink:ChemicalEntity']
10 DDIT4
['biolink:ChemicalEntity']
11 CP3A7_HUMAN Cytochrome P450 3A7 (sprot)
['biolink:ChemicalEntity']
12 JUN
['biolink:ChemicalEntity']
13 HAT1
['biolink:ChemicalEntity']
14 HTR2C
['biolink:ChemicalEntity']
15 ADRA2B
['biolink:ChemicalEntity']
16 PCNA
['biolink:ChemicalEntity']
17 CDC25B
['biolink:ChemicalEntity']
18 UBQLN2
['biolink:ChemicalEntity']
19 ADRA2C
['biolink:ChemicalEntity']
20 STUB1
['biolink:Disease']
['biolink:ChemicalEntity']
21 CDC20
['biolink:ChemicalEntity']
22 CXCL2
['biolink:ChemicalEntity']
23 NUP85
['biolink:ChemicalEntity']
24 RRP8
['biolink:ChemicalEntity']
25 MYC
['biolink:ChemicalEntity']
26 CYP2C9
['biolink:C

In [50]:
#Leigh Disease?
leigh_phenotypes = ["HP:0000739", # | Anxiety, 
                    "HP:0001288", # | Gait disturbance, 
                    "HP:0001252", # | Hypotonia, 
                    "HP:0001250", # | Seizure, 
                    "HP:0000750", # | Delayed speech and language development, 
                    "HP:0002378", # | Hand tremor, 
                    "HP:0002019", # | Constipation, 
                    "HP:0007146"] # | Bilateral basal ganglia lesions

In [51]:
query = generate_phenotypes_to_gene(leigh_phenotypes)
response = requests.post( f"{ARS}/submit", json = query )
print(response.status_code)
pk = response.json()["pk"]
print(pk)

201
3be82cd6-b0c3-4cf2-b8dc-751f56c27aaa


In [52]:
r = retrieve_ars_results(pk,ars_url = ARS)
m = r["ara-aragorn"]
print_results(m)

0 HTR2A
['biolink:ChemicalEntity']
1 ADRA2A
['biolink:ChemicalEntity']
2 HTR2C
['biolink:ChemicalEntity']
3 ADRA2B
['biolink:ChemicalEntity']
4 HTR6
['biolink:ChemicalEntity']
5 SLC6A4
['biolink:ChemicalEntity']
6 DRD3
['biolink:ChemicalEntity']
7 SIGMAR1
['biolink:ChemicalEntity']
8 DRD1
['biolink:ChemicalEntity']
9 HTR7
['biolink:ChemicalEntity']
10 ADRA1A
['biolink:ChemicalEntity']
11 HTR1D
['biolink:ChemicalEntity']
12 HTR1B
['biolink:ChemicalEntity']
13 CYP3A5
['biolink:ChemicalEntity']
14 ABCB1
['biolink:ChemicalEntity']
15 ADA1B_RAT Alpha-1B adrenergic receptor (sprot)
['biolink:ChemicalEntity']
16 Htr1a
['biolink:ChemicalEntity']
17 CYP1A2
['biolink:ChemicalEntity']
18 NFKBIA
['biolink:ChemicalEntity']
19 CASP7
['biolink:ChemicalEntity']
20 CDC20
['biolink:ChemicalEntity']
21 Adra1a
['biolink:ChemicalEntity']
22 STUB1
['biolink:Disease']
['biolink:ChemicalEntity']
23 CP3A7_HUMAN Cytochrome P450 3A7 (sprot)
['biolink:ChemicalEntity']
24 HAT1
['biolink:ChemicalEntity']
25 HRH2
['

In [110]:
# Lets get some phenotypes
cypher = """match (a:`biolink:SmallMolecule` {id:"CHEBI:5613"})-[x:`biolink:has_adverse_event`]->(b:`biolink:DiseaseOrPhenotypicFeature`) 
where not (a)-[:`biolink:treats`]->(b)
and b.id starts with "HP"
return b.id, b.name, x.FAERS_llr as llr order by llr desc"""
robocypher = "https://automat.renci.org/robokopkg/cypher"
response = requests.post(robocypher, json = {"query": cypher} )

In [111]:
response.status_code

200

In [112]:
hps = [ r["row"][0] for r in response.json()["results"][0]["data"] ]
names = [ r["row"][1] for r in response.json()["results"][0]["data"] ]

In [113]:
len(hps)

126

In [114]:
hplist = hps[:10]

In [116]:
query = generate_phenotypes_to_gene(hplist)
response = requests.post( ARS_CI_SUBMIT, json = query )
print(response.status_code)
pk = response.json()["pk"]
print(pk)

201
4a010ad3-a797-4f0a-a8e6-2be447404b77


In [117]:
r = retrieve_ars_results(pk,ars_url = ARS)
m = r["ara-aragorn"]
print_results(m)

0 HTR2A
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
1 SLC6A4
['biolink:Disease']
['biolink:ChemicalEntity']
2 ADRA2A
['biolink:ChemicalEntity']
3 HTR7
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
4 ADRA2B
['biolink:ChemicalEntity']
5 DRD4
['biolink:Disease']
['biolink:ChemicalEntity']
6 HTR6
['biolink:ChemicalEntity']
7 CYP2D6
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
8 ADA1B_RAT Alpha-1B adrenergic receptor (sprot)
['biolink:ChemicalEntity']
9 HTR1B
['biolink:Disease']
['biolink:ChemicalEntity']
10 ABCB11
['biolink:ChemicalEntity']
11 Adra1a
['biolink:ChemicalEntity']
12 SIGMAR1
['biolink:ChemicalEntity']
13 HTR1D
['biolink:ChemicalEntity']
14 Htr1a
['biolink:ChemicalEntity']
15 DRD3
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
16 HRH2
['biolink:ChemicalEntity']
17 CASP3
['biolink:ChemicalEntity']
18 CYP3A4
['biolink:ChemicalEntity']
19 DRD2
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
20 Htr2a
['biolink:Ch

In [73]:
hplist = hps[:30]
query = generate_phenotypes_to_gene(hplist)
response = requests.post( f"https://aragorn.renci.org/aragorn/query", json = query )
print(response.status_code)

200


In [74]:
print_results(response.json())

0 HTR2A
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
1 HTR2C
['biolink:Disease']
['biolink:ChemicalEntity']
2 ADRA2A
['biolink:Disease']
['biolink:ChemicalEntity']
3 SLC6A4
['biolink:Disease']
['biolink:ChemicalEntity']
4 ADRA2B
['biolink:ChemicalEntity']
5 DRD4
['biolink:Disease']
['biolink:ChemicalEntity']
6 HTR6
['biolink:ChemicalEntity']
7 ABCB1
['biolink:Disease']
['biolink:ChemicalEntity']
8 ABCB11
['biolink:ChemicalEntity']
9 ADA1B_RAT Alpha-1B adrenergic receptor (sprot)
['biolink:ChemicalEntity']
10 CYP2D6
direct path?
['biolink:ChemicalEntity']
11 HTR7
['biolink:ChemicalEntity']
12 HTR1B
['biolink:Disease']
['biolink:ChemicalEntity']
13 SIGMAR1
['biolink:ChemicalEntity']
14 CYP3A4
['biolink:ChemicalEntity']
15 Adra1a
['biolink:ChemicalEntity']
16 CACNA1C
['biolink:Disease']
['biolink:ChemicalEntity']
17 CDC20
['biolink:ChemicalEntity']
18 DRD1
['biolink:Disease']
['biolink:ChemicalEntity']
19 DRD2
direct path?
['biolink:Disease']
['biolink:ChemicalEntity']
20 H