In [2]:
%load_ext autoreload
%load_ext lab_black

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [3]:
import sys

sys.path.append("..")

---

In [17]:
import json

import requests
import pandas as pd
from icecream import ic

In [28]:
epigraphdb_api_url = "https://api.epigraphdb.org"
transformers_url = "http://localhost:8017"
models_api_url = "http://localhost:8016"
neural_api_url = "http://localhost:28015"
for _ in [epigraphdb_api_url, transformers_url, models_api_url, neural_api_url]:
    r = requests.get(f"{_}/ping")
    assert r.json()

---

In [7]:
entity_name = "Rare hyperthyroidism"

In [29]:
r = requests.get(
    f"{neural_api_url}/query/text",
    params={"text": entity_name, "include_meta_nodes": ["Efo"], "limit": 20},
)
r.raise_for_status()
recommender_df = pd.DataFrame(r.json()["results"])
recommender_df

Unnamed: 0,id,name,text,score,meta_node
0,http://www.orpha.net/ORDO/Orphanet_181399,Rare hyperthyroidism,rare hyperthyroidism,1.0,Efo
1,http://www.orpha.net/ORDO/Orphanet_181396,Rare hypothyroidism,rare hypothyroidism,0.938469,Efo
2,http://www.ebi.ac.uk/efo/EFO_0009189,Hyperthyroidism,hyperthyroidism,0.798496,Efo
3,http://www.ebi.ac.uk/efo/EFO_0009190,Thyrotoxicosis,thyrotoxicosis,0.773087,Efo
4,http://purl.obolibrary.org/obo/MONDO_0021136,rare,rare,0.76176,Efo
5,http://www.orpha.net/ORDO/Orphanet_181431,Rare hypolipidemia,rare hypolipidemia,0.753467,Efo
6,http://purl.obolibrary.org/obo/MONDO_0019304,rare photodermatosis,rare photodermatosis,0.743852,Efo
7,http://www.orpha.net/ORDO/Orphanet_181422,Rare hyperlipidemia,rare hyperlipidemia,0.739618,Efo
8,http://www.orpha.net/ORDO/Orphanet_183631,Rare genetic thyroid disease,rare genetic thyroid disease,0.732493,Efo
9,http://purl.obolibrary.org/obo/MONDO_0019859,congenital thyroid malformation without hypoth...,congenital thyroid malformation without hypoth...,0.713052,Efo


In [16]:
# get entity's efo equivalent term, id and name
query = "MATCH (n:Efo) WHERE n._name = '{term}' RETURN n {{._id, ._name}}".format(
    term=entity_name
)
r = requests.post(f"{epigraphdb_api_url}/cypher", json={"query": query})
r.raise_for_status()
ic(r.json())
results = r.json()["results"]

efo_id = results[0]["n"]["_id"]
efo_name = results[0]["n"]["_name"]
ic(efo_id)
ic(efo_name)

ic| r.json(): {'metadata': {'empty_results': False,
                            'query': "MATCH (n:Efo) WHERE n._name = 'Rare hyperthyroidism' "
                                     'RETURN n {._id, ._name}',
                            'total_seconds': 0.008761},
               'results': [{'n': {'_id': 'http://www.orpha.net/ORDO/Orphanet_181399',
                                  '_name': 'Rare hyperthyroidism'}}]}
ic| efo_id: 'http://www.orpha.net/ORDO/Orphanet_181399'
ic| efo_name: 'Rare hyperthyroidism'


'Rare hyperthyroidism'

In [23]:
# get efo term's parents and children
query = """
    MATCH (n:Efo)<-[r:EFO_CHILD_OF]-(parent_efo:Efo)
    WHERE n._id = '{efo_id}' AND n._name = '{efo_name}'
    RETURN parent_efo._id AS efo_id, parent_efo._name AS efo_name
""".format(
    efo_id=efo_id, efo_name=efo_name
)
r = requests.post(f"{epigraphdb_api_url}/cypher", json={"query": query})
r.raise_for_status()
parent_df = pd.DataFrame(r.json()["results"]).assign(node_type="parent")
ic(parent_df)

query = """
    MATCH (n:Efo)-[r:EFO_CHILD_OF]->(child_efo:Efo)
    WHERE n._id = '{efo_id}' AND n._name = '{efo_name}'
    RETURN child_efo._id AS efo_id, child_efo._name AS efo_name
""".format(
    efo_id=efo_id, efo_name=efo_name
)
r = requests.post(f"{epigraphdb_api_url}/cypher", json={"query": query})
r.raise_for_status()
child_df = pd.DataFrame(r.json()["results"]).assign(node_type="child")

efo_df = pd.concat(
    [
        pd.DataFrame([{"efo_id": efo_id, "efo_name": efo_name, "node_type": "self"}]),
        parent_df,
        child_df,
    ]
)
efo_df

ic| parent_df:                                       efo_id                      efo_name  \
               0       http://www.ebi.ac.uk/efo/EFO_0009189               Hyperthyroidism   
               1  http://www.orpha.net/ORDO/Orphanet_183631  Rare genetic thyroid disease   
               
                 node_type  
               0    parent  
               1    parent  


Unnamed: 0,efo_id,efo_name,node_type
0,http://www.orpha.net/ORDO/Orphanet_181399,Rare hyperthyroidism,self
0,http://www.ebi.ac.uk/efo/EFO_0009189,Hyperthyroidism,parent
1,http://www.orpha.net/ORDO/Orphanet_183631,Rare genetic thyroid disease,parent
0,http://www.orpha.net/ORDO/Orphanet_99819,Familial gestational hyperthyroidism,child
1,http://www.orpha.net/ORDO/Orphanet_424,Familial hyperthyroidism due to mutations in T...,child
2,http://www.orpha.net/ORDO/Orphanet_3221,Generalized resistance to thyroid hormone,child
3,http://www.orpha.net/ORDO/Orphanet_165994,Selective pituitary resistance to thyroid hormone,child


In [27]:
# get the cosine similarity between entity's embedding and embeddings of efo terms, by scispacy
r = requests.get(
    f"{models_api_url}/nlp/similarity", params={"text1": "apple", "text2": "orange"}
)
ic(r.json())

semantic_sim_df = (
    efo_df[["efo_name", "node_type"]]
    .assign(entity_term=entity_name)
    .assign(
        cosine_sim=lambda df: df.apply(
            lambda row: requests.get(
                f"{models_api_url}/nlp/similarity",
                params={"text1": row["efo_name"], "text2": row["entity_term"]},
            ).json(),
            axis=1,
        )
    )
)
semantic_sim_df

ic| r.json(): 0.439932479513145


Unnamed: 0,efo_name,node_type,entity_term,cosine_sim
0,Rare hyperthyroidism,self,Rare hyperthyroidism,1.0
0,Hyperthyroidism,parent,Rare hyperthyroidism,0.690695
1,Rare genetic thyroid disease,parent,Rare hyperthyroidism,0.729315
0,Familial gestational hyperthyroidism,child,Rare hyperthyroidism,0.674532
1,Familial hyperthyroidism due to mutations in T...,child,Rare hyperthyroidism,0.585188
2,Generalized resistance to thyroid hormone,child,Rare hyperthyroidism,0.436534
3,Selective pituitary resistance to thyroid hormone,child,Rare hyperthyroidism,0.395431


In [31]:
# sequence classifier
def get_seq_cls_score(text_1, text_2):
    url = f"{transformers_url}/inference"
    r = requests.post(url, json={"text_1": text_1, "text_2": text_2})
    r.raise_for_status()
    res = r.json()
    return res


ic(
    get_seq_cls_score(
        text_1=["body weight", "coronary artery"],
        text_2=["body mass", "coronary heart disease"],
    )
)

seq_cls_df = (
    efo_df[["efo_name", "node_type"]]
    .assign(entity_term=entity_name)
    .assign(
        score=lambda df: get_seq_cls_score(
            text_1=df["efo_name"].tolist(), text_2=df["entity_term"].tolist(),
        )
    )
)
seq_cls_df

ic| get_seq_cls_score(
        text_1=["body weight", "coronary artery"],
        text_2=["body mass", "coronary heart disease"],
    ): [1.1656049489974976, 13.970565795898438]


Unnamed: 0,efo_name,node_type,entity_term,score
0,Rare hyperthyroidism,self,Rare hyperthyroidism,-0.38575
0,Hyperthyroidism,parent,Rare hyperthyroidism,0.278737
1,Rare genetic thyroid disease,parent,Rare hyperthyroidism,1.289101
0,Familial gestational hyperthyroidism,child,Rare hyperthyroidism,2.456133
1,Familial hyperthyroidism due to mutations in T...,child,Rare hyperthyroidism,2.451932
2,Generalized resistance to thyroid hormone,child,Rare hyperthyroidism,3.713221
3,Selective pituitary resistance to thyroid hormone,child,Rare hyperthyroidism,3.945152
